Exemplo n.º 1
0
def run(args):
    # data loader
    train_set = dataset.TextDataset(config=config_char.dataset_config,
                                    mode='train')
    train_loader = DataLoader(train_set,
                              batch_size=config_char.train_config['batch'],
                              shuffle=True,
                              num_workers=1)

    eval_set = dataset.TextDataset(config=config_char.dataset_config,
                                   mode='eval')
    eval_loader = DataLoader(eval_set,
                             batch_size=config_char.train_config['batch'],
                             shuffle=True,
                             num_workers=1)

    # model
    net = model.CharCNN(config=config_char.model_config)
    # net = model.NaiveNN(config=config_char.model_config)

    # optimizer
    optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

    # criterion
    criterion = nn.NLLLoss()

    # train
    for epoch in range(config_char.train_config['epochs']):
        # Training
        print('Training')

        running_loss = 0

        tbar = tqdm.tqdm(total=len(train_loader))  # hard code

        for batch_idx, sample in enumerate(train_loader):

            tbar.update(1)

            feature, target = sample['feature'], sample['target']
            feature, target = Variable(feature).float(), Variable(
                target).long()

            optimizer.zero_grad()

            # 2. forward
            output = net(feature)

            # 3. loss
            loss = criterion(output, target)

            # 4. backward
            loss.backward()

            # 5. optimize
            optimizer.step()

            running_loss += loss.data.item()

        tbar.close()

        print(running_loss)

        # Testing
        print('Testing')

        positive = 0
        negative = 0

        trump = 0
        hillary = 0

        vbar = tqdm.tqdm(total=len(eval_loader))

        for batch_idx, sample in enumerate(eval_loader):

            vbar.update(1)

            feature, target = sample['feature'], sample['target']
            feature, target = Variable(feature).float(), Variable(
                target).long()

            output = net(feature)

            _, index = output.max(1)

            # print(torch.sum(index).data[0], torch.sum(target).data[0])
            # print(torch.sum(index == target), torch.sum(index != target))

            positive += (torch.sum(index == target)).data.item()
            negative += (torch.sum(index != target)).data.item()

            hillary += (torch.sum(1 == target)).data.item()
            trump += (torch.sum(0 == target)).data.item()

            # print(positive, negative)

        vbar.close()

        print('acc: ', positive / (positive + negative), 'positive: ',
              positive, 'negative: ', negative)
        print('hillary: ', hillary, 'trump: ', trump)
Exemplo n.º 2
0
    torch.manual_seed(8)
    torch.cuda.manual_seed(8)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if args.model == "charcnn":
        args.datatype = "char"
    elif args.model == "simplernn":
        args.datatype = "word"
    info = pickle.load(open('dict/' + str(args.data) + '.info', 'rb'))
    word_index = info['word_index']
    index2word = info['index2word']
    classes_list = info['classes_list']
    numclass = len(classes_list)
    if args.model == "charcnn":
        model = model.CharCNN(classes=numclass)
    elif args.model == "simplernn":
        model = model.smallRNN(classes=numclass)
    elif args.model == "bilstm":
        model = model.smallRNN(classes=numclass, bidirection=True)

    print(model)

    state = torch.load(args.modelpath)
    model = model.to(device)
    try:
        model.load_state_dict(state['state_dict'])
    except:
        model = torch.nn.DataParallel(model)
        model.load_state_dict(state['state_dict'])
        model = model.module
print("\nParameters:")
for attr, value in sorted(args.__dict__.items()):
    print("\t{}={}".format(attr.upper(), value))

# model

if args.snapshot is not None:
    print('\nLoading model from {}...'.format(args.snapshot))
    classification_model = torch.load(args.snapshot)
elif args.rnn:
    classification_model = model.RNNClassifier(args.embed_num, args.embed_num,
                                               args.hidden_dim,
                                               args.rnn_layers, args.class_num)
else:
    classification_model = model.CharCNN(args.embed_num, args.embed_dim,
                                         args.class_num, args.kernel_num,
                                         args.kernel_sizes, args.dropout)

if args.cuda:
    torch.cuda.set_device(args.device)
    classification_model = classification_model.cuda()

# train or predict
if args.predict is not None:
    label = predict.predict(args.predict, classification_model, text_field,
                            label_field, args.cuda)
    print('\n[Text]  {}\n[Label] {}\n'.format(args.predict, label))
elif args.test:
    train.eval(dev_iter, classification_model, args)
    #Print index of labels
    for i in range(args.class_num):