Python BertClassifier.to примеры использования

Язык программирования: Python

Пространство имен/Пакет: model

Класс/Тип: BertClassifier

Метод/Функция: to

Примеров на hotexamples.com: 5

Python BertClassifier.to - 5 примеров найдено. Это лучшие примеры Python кода для model.BertClassifier.to, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BertClassifier(23)

parameters(10)

load_state_dict(7)

eval(6)

to(5)

state_dict(4)

train(3)

named_parameters(2)

zero_grad(2)

cuda(1)

infer(1)

predict_on_batch(1)

train_on_batch(1)

validate_on_batch(1)

Пример #1

Показать файл

Файл: inference.py Проект: dtxwhzw/STAT3007

class Predictor(object):
    def __init__(self, args):
        pretrain_name = 'bert-base-cased'
        if args.model_info.bert_path:
            pretrain_name = args.model_info.bert_path
        self.tokenizer = BertTokenizer.from_pretrained(pretrain_name)
        print(f"Tokenizer from:{pretrain_name}")
        train_conf = args.train_info
        model_conf = args.model_info
        self.device = train_conf.device
        self.class_num = model_conf.class_num
        self.model = BertClassifier(model_conf)
        self.model.load_state_dict(
            torch.load(train_conf.model_path,
                       map_location=torch.device(self.device)))
        self.model.to(self.device)
        self.lr = train_conf.lr
        self.max_len = train_conf.max_seq_len
        self.conf = args
        self.label_map = json.load(open(args.label_map_path))
        self.id2label = dict([(i, label_str)
                              for label_str, i in self.label_map.items()])
        self.softmax = Softmax(dim=1)

    def predict(self, sens):
        d_loader = self.sen_2_dl(sens)
        y_pred = list()
        with torch.no_grad():
            for batch in d_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                logits = self.model(input_ids, attention_mask)
                logits = torch.sigmoid(logits)
                y_pred.append(logits)
        y_pred = torch.cat(y_pred)
        y_pred = y_pred.cpu().numpy()
        res = list()
        for y in y_pred:
            res.append(self._score_2_dict(y))
        return res

    def _score_2_dict(self, single_pred):
        res = dict()
        for i, score in enumerate(single_pred):
            label_str = self.id2label[i]
            res[label_str] = float(score)
        return res

    def sen_2_dl(self, sens):
        texts = [i.strip() for i in sens]
        labels = [
            999
        ]  # this is a invalid parameter but dataloader needs the this
        ds = SentimentDataset(self.tokenizer, texts, labels, self.max_len)
        _loader = dataloader.DataLoader(
            ds, batch_size=self.conf.train_info.batch_size, shuffle=False)
        return _loader

Пример #2

Показать файл

Файл: evaluate.py Проект: dtxwhzw/STAT3007

class Evaluator(object):
    def __init__(self, args):
        pretrain_name = 'bert-base-cased'
        if args.model_info.bert_path:
            pretrain_name = args.model_info.bert_path
        print(f"Tokenizer from:{pretrain_name}")
        train_conf = args.train_info
        model_conf = args.model_info
        self.model_type = model_conf.model
        if self.model_type == 'bert_seq':
            self.model = BertClassifier(model_conf)
            self.tokenizer = BertTokenizer.from_pretrained(pretrain_name)
            self.ds = SentimentDataset
        if self.model_type == 'GPT2':
            self.model = GPT2Classifier(model_conf)
            self.tokenizer = GPT2Tokenizer.from_pretrained(pretrain_name)
            self.ds = GPT2Dataset
        self.model.load_state_dict(torch.load(train_conf.model_path))
        self.device = train_conf.device
        self.class_num = model_conf.class_num
        self.model.to(self.device)
        self.lr = train_conf.lr
        self.max_len = train_conf.max_seq_len
        self.conf = args
        self.label_map = json.load(open(args.label_map_path))
        self.id2label = dict([(i, label_str)
                              for label_str, i in self.label_map.items()])

    def run(self, batch_size=64):
        test_path = self.conf.train_info.test_path
        test_loader = self.get_data_loader(test_path, batch_size)
        acc, recall, f1_score, cm, report, res = self.evaluate(test_loader)
        print(f"Accuracy score of the model is {acc}")
        print(f"Recall score of the model is {recall}")
        print(f"F1 score of the model is {f1_score}")
        print(f"Confusion matrix of the model is {cm}")
        print(report)
        dir_ = os.path.dirname(test_path)
        dir_ = os.path.dirname(dir_)
        dir_ = os.path.split(dir_)[0]
        new_path = os.path.join(dir_, 'logs', 'bad_case.json')
        f = open(new_path, 'w')
        for i in res:
            print(json.dumps(i, ensure_ascii=False), file=f)

    def evaluate(self, _loader):
        self.model.eval()
        y_true = list()
        y_pred = list()
        res = []
        with torch.no_grad():
            for batch in _loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                y = batch['labels']
                y = torch.squeeze(y, 1)
                y = y.to(self.device)
                logits = self.model(input_ids, attention_mask)
                y_true.append(y)
                y_pred.append(logits)
                pred_labels = torch.argmax(logits, dim=1)
                preds = pred_labels.cpu().numpy()
                true = batch['labels'].squeeze().numpy()
                if len(true) < 1:
                    continue
                for i, c_y in enumerate(true):
                    if c_y != preds[i]:
                        tmp_dict = {
                            'true_label': self.id2label[c_y],
                            'pred_label': self.id2label[preds[i]],
                            'text': batch['text'][i]
                        }
                        res.append(tmp_dict)
            y_true = torch.cat(y_true)
            y_pred = torch.cat(y_pred)
        cm = metrics.cal_cm(y_true, y_pred)
        acc_score = metrics.cal_accuracy(y_true, y_pred)
        recall = metrics.cal_recall(y_true, y_pred)
        f1_score = metrics.cal_f1(y_true, y_pred)
        label_range = [i for i in range(len(self.label_map))]
        target_name = [
            x[0] for x in sorted(self.label_map.items(), key=lambda x: x[1])
        ]
        report = metrics.get_classification_report(y_true, y_pred, label_range,
                                                   target_name)
        return acc_score, recall, f1_score, cm, report, res

    def get_data_loader(self, f_path, batch_size):
        np.random.seed(14)
        texts, labels = prepare(f_path, self.label_map)
        ds = self.ds(self.tokenizer, texts, labels, self.max_len)
        return dataloader.DataLoader(ds,
                                     batch_size=batch_size,
                                     num_workers=self.conf.num_workers,
                                     shuffle=True)

Пример #3

Показать файл

Файл: main.py Проект: hflserdaniel/emoji-prediction

        total_count += gt.shape[0]
        total_loss.append(criterion(preds, labels).item())

    loss, acc = np.array(total_loss).mean(), total_correct / total_count
    print("Average Loss: {:.6f}, Accuracy: {:.6f}".format(loss, acc))
    return loss, acc


device = 'cuda' if torch.cuda.is_available() else 'cpu'
epochs = 30
best_acc = 0.0
eval_losses, eval_accs = [], []
train_losses, train_accs = [], []

model = BertClassifier(freeze_bert=False)
model = model.to(device)
# model = nn.DataParallel(model)

train_dataset = EmojiDataset('../../data/train_bert_sentences.npy',
                             '../../data/train_bert_labels.npy')
train_dataloader = DataLoader(train_dataset,
                              batch_size=64,
                              shuffle=False,
                              collate_fn=collate_fn)

test_dataset = EmojiDataset('../../data/test_bert_sentences.npy',
                            '../../data/test_bert_labels.npy')
test_dataloader = DataLoader(test_dataset,
                             batch_size=128,
                             shuffle=False,
                             collate_fn=collate_fn)

Пример #4

Показать файл

Файл: predict.py Проект: Xilixili/CCF-data-competetion

import pandas as pd

labels = ['体育', '娱乐', '家居', '房产', '教育', '时尚', '时政', '游戏', '科技', '财经']

bert_config = BertConfig.from_pretrained('chinese_wwm_pytorch')
bert_config.num_labels = len(labels)
model = BertClassifier(bert_config)
model.load_state_dict(
    torch.load('./best_model_on_trainset.pkl',
               map_location=torch.device('cpu')))

tokenizer = BertTokenizer(vocab_file='chinese_wwm_pytorch/vocab.txt')

device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
model = torch.nn.DataParallel(model, device_ids=[2])
model.to(device)


def predict_text(text):
    input_id, attention_mask, token_type_id = get_bert_input(text, tokenizer)

    input_id = torch.tensor([input_id], dtype=torch.long)
    attention_mask = torch.tensor([attention_mask], dtype=torch.long)
    token_type_id = torch.tensor([token_type_id], dtype=torch.long)

    predicted = model(
        input_id,
        attention_mask,
        token_type_id,
    )
    pred_label = torch.argmax(predicted, dim=1)

Пример #5

Показать файл

Файл: app.py Проект: msgolovina/Toxic

    attention_mask = (x != 0).float().to(config.DEVICE).long()
    outputs = MODEL(x, attention_mask=attention_mask)
    return outputs.cpu().detach().numpy()


@app.route('/predict')
def predict():
    comment = request.args.get('comment')
    start_time = time.time()
    prediction = comment_prediction(comment)
    response = {
        'response': {
            label: str(prob)
            for label, prob in zip(config.CLASS_COLS, prediction[0])
        }
    }
    response['response']['comment'] = comment
    response['response']['time_taken'] = str(time.time() - start_time)

    return flask.jsonify(response)


if __name__ == '__main__':
    bert_config = BertConfig.from_pretrained(config.BERT_NAME)
    bert_config.num_labels = config.NUM_CLASSES
    MODEL = BertClassifier(bert_config)
    MODEL.load_state_dict(torch.load(config.TRAINED_MODEL_PATH))
    MODEL.to(config.DEVICE)
    MODEL.eval()
    app.run(host=config.HOST, port=config.PORT)