import stst

# Define Model
gb = stst.Classifier(stst.GradientBoostingRegression())
model = stst.Model('S1-gb', gb)

# Add features to the Model
model.add(stst.WeightednGramMatchFeature(type='lemma'))
model.add(stst.BOWFeature(stopwords=False))
model.add(stst.AlignmentFeature())
model.add(stst.IdfAlignmentFeature())
model.add(stst.NegativeFeature())

# train and test
train_file = './data/stsbenchmark/sts-train.csv'
dev_file = './data/stsbenchmark/sts-dev.csv'
test_file = './data/stsbenchmark/sts-test.csv'

# init the server and input the address
nlp = stst.StanfordNLP('http://localhost:9000')

# parse data
train_instances = stst.load_parse_data(train_file, nlp)
dev_instances = stst.load_parse_data(dev_file, nlp)

# train and test
model.train(train_instances, train_file)
model.test(dev_instances, dev_file)

# evaluation
dev_pearsonr = stst.eval_output_file(model.output_file)
Пример #2
0
from __future__ import print_function
import stst
from features.features_unigram import UnigramFeature, FuckFeature, TFFeature, BigramFeature
from features.features_unigram import MinAvgMaxEmbeddingFeature
from stst import config
from main_tools import *
# Define Model
lr = stst.Classifier(stst.LIB_LINEAR_LR())
svm = stst.Classifier(stst.skLearn_svm())
xgb = stst.Classifier(stst.XGBOOST_prob())
boosting = stst.Classifier(stst.sklearn_GradientBoosting())

model = stst.Model('S1-gb', lr)
model.add(FuckFeature(load=False))


def train_model_best(is_training=False, model_name='S2-gb', classifier=lr):
    model_best = stst.Model(model_name, classifier)
    model_best.add(TFFeature(type='word', convey='count', load=True))
    model_best.add(TFFeature(type='char', convey='count', load=True))
    model_best.add(BigramFeature(type='word', convey='count', load=True))
    model_best.add(BigramFeature(type='char', convey='count', load=True))

    emb_wd_50_file =  config.EMB_WORD_DIR + '/embedding.50'
    emb_wd_100_file =  config.EMB_WORD_DIR + '/embedding.100'
    emb_wd_200_file =  config.EMB_WORD_DIR + '/embedding.200'
    emb_wd_300_file =  config.EMB_WORD_DIR + '/embedding.300'

    headlines_vec = config.EMB_WORD_DIR + '/headlines.vec'

    model_best.add(MinAvgMaxEmbeddingFeature('headlines', 100, headlines_vec, pooling_type='avg', load=True))
Пример #3
0
# coding: utf8

from input import data
import stst
from features.pmi_feature import *
from features.warrant_feature import *
from metric import evaluation

classifier = stst.Classifier(stst.LIB_LINEAR_LR())
model = stst.Model('NLP', classifier)

model.add(Warrant_Feature(load=False))
# model.add(BowFeature(load=False))
# model.add(BI_feature(load=False))

train_file = config.train_file
train_instances = data.load_parse_data(train_file)

dev_file = config.dev_file
dev_instances = data.load_parse_data(dev_file)

test_file = config.test_file
test_instances = data.load_parse_data(test_file)

model.train(train_instances, train_file)
acc = evaluation.Evaluation(train_file, model.output_file)
print(acc)

model.test(dev_instances, dev_file)
acc = evaluation.Evaluation(dev_file, model.output_file)
print(acc)
Пример #4
0
import stst
from features.pmi_feature import *
from features.warrant_feature import *
from features.nn_features import *
from metric import evaluation

train_file = config.train_file
train_instances = data.load_parse_data(train_file)

dev_file = config.dev_file
dev_instances = data.load_parse_data(dev_file)

test_file = config.test_file
test_instances = data.load_parse_data(test_file)

lr = stst.Classifier(stst.LIB_LINEAR_LR())
nlp_model = stst.Model('lr', lr)
nlp_model.add(Warrant_Feature())

nlp_model.train(train_instances, train_file)
nlp_model.test(dev_instances, dev_file)
nlp_model.test(test_instances, test_file)

vote = stst.Classifier(stst.VoteEnsemble())
model1 = stst.Model('vote1', vote)

# model1.add(NNAVGFeature('intra_attention_cnn_margin', config.NN_RUN_DIR + '/run_intra_attention_cnn_margin_0121_19_04', load=False))  # 0.5
# model1.add(NNAVGFeature('intra_attention_cnn', config.NN_RUN_DIR + '/run_intra_attention_cnn_0121_19_03', load=False)) # 0.46
# model1.add(NNFeature('intra_attention_i', config.NN_RUN_DIR + '/run_intra_attention_i_0121_19_03', load=False)) # 0.5
# model1.add(NNAVGFeature('run_intra_attention_cnn_negclaim', config.NN_RUN_DIR + '/run_intra_attention_cnn_negclaim_0121_20_37', load=False))