示例#1
0
def loadData(opt):
    if not opt.from_torchtext:
        import dataHelper as helper
        return helper.loadData(opt)
    device = 0 if torch.cuda.is_available() else -1

    TEXT = data.Field(lower=True,
                      include_lengths=True,
                      batch_first=True,
                      fix_length=opt.max_seq_len)
    LABEL = data.Field(sequential=False)
    if opt.dataset == "imdb":
        train, test = datasets.IMDB.splits(TEXT, LABEL)
    elif opt.dataset == "sst":
        train, val, test = datasets.SST.splits(
            TEXT,
            LABEL,
            fine_grained=True,
            train_subtrees=True,
            filter_pred=lambda ex: ex.label != 'neutral')
    elif opt.dataset == "trec":
        train, test = datasets.TREC.splits(TEXT, LABEL, fine_grained=True)
    else:
        print("does not support this datset")

    TEXT.build_vocab(train, vectors=GloVe(name='6B', dim=300))
    LABEL.build_vocab(train)
    # print vocab information
    print('len(TEXT.vocab)', len(TEXT.vocab))
    print('TEXT.vocab.vectors.size()', TEXT.vocab.vectors.size())

    train_iter, test_iter = data.BucketIterator.splits(
        (train, test),
        batch_size=opt.batch_size,
        device=device,
        repeat=False,
        shuffle=True)

    opt.label_size = len(LABEL.vocab)
    opt.vocab_size = len(TEXT.vocab)
    opt.embedding_dim = TEXT.vocab.vectors.size()[1]
    opt.embeddings = TEXT.vocab.vectors

    return train_iter, test_iter
from_torchtext = True

opt = opts.parse_opt()
#opt.proxy="http://xxxx.xxxx.com:8080"


if "CUDA_VISIBLE_DEVICES" not in os.environ.keys():
    os.environ["CUDA_VISIBLE_DEVICES"] =opt.gpu
#opt.model ='lstm'
#opt.model ='capsule'
if from_torchtext:
    train_iter, test_iter = utils.loadData(opt)
else:
    import dataHelper as helper
    train_iter, test_iter = dataHelper.loadData(opt)

opt.lstm_layers=2
print('Print loading models')
model2=models.setup(opt)
model2.load_state_dict(torch.load('saved_models/lstm_test.pt'))
model2.cuda()
percision=utils.evaluation(model2,test_iter,from_torchtext)
print("After iteration with model 2 Test Acc %.4f" % (percision))
ipdb.set_trace()
model=models.setup(opt)
# model.load_state_dict(torch.load('lstm_new.pt'))
if torch.cuda.is_available():
    model.cuda()
model.train()
print("# parameters:", sum(param.numel() for param in model.parameters() if param.requires_grad))
示例#3
0
from_torchtext = False

opt = opts.parse_opt()
#opt.proxy="http://xxxx.xxxx.com:8080"

if "CUDA_VISIBLE_DEVICES" not in os.environ.keys():
    os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu
#opt.model ='lstm'
#opt.model ='capsule'

if from_torchtext:
    train_iter, test_iter = utils.loadData(opt)
else:
    import dataHelper as helper
    train_iter, dev_iter, test_iter = dataHelper.loadData(opt)

opt.lstm_layers = 2

torch.manual_seed(0)
model = models.setup(opt)
if torch.cuda.is_available():
    model.cuda()
model.train()
print(
    "# parameters:",
    sum(param.numel() for param in model.parameters() if param.requires_grad))
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),
                       lr=opt.learning_rate)
optimizer.zero_grad()
loss_fun = F.cross_entropy
示例#4
0
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy as np
import opts
import dataHelper
#refer to "https://zhuanlan.zhihu.com/p/26729228"
opt = opts.parse_opt()
import dataHelper as helper
train_iter, test_iter = dataHelper.loadData(opt,embedding=False)
#categories = ['good', 'bad', 'mid']
x_train,y_train=train_iter
x_test,y_test = test_iter

#opt.model ="haha"
if opt.model == "bayes":
    """ Naive Bayes classifier """
    # sklearn有一套很成熟的管道流程Pipeline,快速搭建机器学习模型神器
    bayes_clf = Pipeline([('vect', CountVectorizer()), 
                          ('tfidf', TfidfTransformer()),
                          ('clf', MultinomialNB()) 
                          ])
    bayes_clf.fit(x_train, y_train)
    """ Predict the test dataset using Naive Bayes"""
    predicted = bayes_clf.predict(x_test)