#!/usr/bin/env python # -*- coding: utf-8 -*- import codecs import config import torch from vocab import WordVocab, LemmaVocab, TagVocab, PredictVocab, SemTagVocab from vocab import IllegalSampleError config.add_option('-T', '--train', dest='train_file', type='string', help='train data file', action='store') config.add_option('-D', '--dev', dest='dev_file', type='string', help='evaluation data file', action='store') class DataSet(object): def __init__(self, filename, vocabs=None): self._filename = filename self._vocabs = vocabs if vocabs else [ WordVocab(), LemmaVocab(), TagVocab(), PredictVocab(), SemTagVocab() ]
#!/usr/bin/env python # -*- coding: utf-8 -*- import config import torch from torch import nn from torch.autograd import Variable from biaffine import BiAffineModel config.add_option('--dropout_rate', dest='dropout_rate', default=0., type='float', help='model dropout', action='store') config.add_option( '--activate', dest='activate', default='tanh', type='string', help= 'activate functions (relu|glu|logsigmoid|softsign|log_softmax|sigmoid)', action='store') __all__ = ['BiAffineSrlModel'] class BaseModel(nn.Module): def __init__(self, *args, **kwargs): super().__init__() vocabs = kwargs.pop('vocabs') self.vocab = {vocab.name: vocab for vocab in vocabs}
#!/usr/bin/env python # -*- coding: utf-8 -*- import codecs import os import numpy from collections import Counter import config import math config.add_option('--recount', dest='recount', default=False, help='If there need to be recount the vocab', action='store_true') class IllegalSampleError(RuntimeError): pass class BaseVocab(object): _special_tokens = [] min_occur_count = 0 max_rank = math.inf def __init__(self, *args, **kwargs): self._name = kwargs.pop('name', self.__class__.__name__) self._filename = os.path.join(config.get_option('save'), self._name + '.txt') self._complete = False self._str2idx = zip(self._special_tokens,
def add_option(self, option, value, type=config.OptionTypePlain): config.add_option(self.tab_name, option, config.Option(value, type)) box = (OptionBox(option, value, type, plugin=self.tab_name)) self.options.append(box) self.add(box)
# -*- coding: utf-8 -*- import sys import torch from torch import nn, autograd import config import time import copy import progressbar as pb from dataset import TrainDataSet from model import BiAffineSrlModel from fscore import FScore config.add_option('-m', '--mode', dest='mode', default='train', type='string', help='[train|eval|pred]', action='store') config.add_option('--seed', dest='seed', default=1, type='int', help='torch random seed', action='store') def train(num_epochs=30): lossfunction = nn.CrossEntropyLoss() trainset = TrainDataSet() model = BiAffineSrlModel(vocabs=trainset.vocabs)