Exemplo n.º 1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs
import config
import torch
from vocab import WordVocab, LemmaVocab, TagVocab, PredictVocab, SemTagVocab
from vocab import IllegalSampleError
config.add_option('-T',
                  '--train',
                  dest='train_file',
                  type='string',
                  help='train data file',
                  action='store')
config.add_option('-D',
                  '--dev',
                  dest='dev_file',
                  type='string',
                  help='evaluation data file',
                  action='store')


class DataSet(object):
    def __init__(self, filename, vocabs=None):
        self._filename = filename
        self._vocabs = vocabs if vocabs else [
            WordVocab(),
            LemmaVocab(),
            TagVocab(),
            PredictVocab(),
            SemTagVocab()
        ]
Exemplo n.º 2
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import config
import torch
from torch import nn
from torch.autograd import Variable
from biaffine import BiAffineModel
config.add_option('--dropout_rate',
                  dest='dropout_rate',
                  default=0.,
                  type='float',
                  help='model dropout',
                  action='store')
config.add_option(
    '--activate',
    dest='activate',
    default='tanh',
    type='string',
    help=
    'activate functions (relu|glu|logsigmoid|softsign|log_softmax|sigmoid)',
    action='store')

__all__ = ['BiAffineSrlModel']


class BaseModel(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__()
        vocabs = kwargs.pop('vocabs')
        self.vocab = {vocab.name: vocab for vocab in vocabs}
Exemplo n.º 3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs
import os
import numpy
from collections import Counter
import config
import math

config.add_option('--recount',
                  dest='recount',
                  default=False,
                  help='If there need to be recount the vocab',
                  action='store_true')


class IllegalSampleError(RuntimeError):
    pass


class BaseVocab(object):
    _special_tokens = []
    min_occur_count = 0
    max_rank = math.inf

    def __init__(self, *args, **kwargs):
        self._name = kwargs.pop('name', self.__class__.__name__)
        self._filename = os.path.join(config.get_option('save'),
                                      self._name + '.txt')
        self._complete = False
        self._str2idx = zip(self._special_tokens,
Exemplo n.º 4
0
 def add_option(self, option, value, type=config.OptionTypePlain):
     config.add_option(self.tab_name, option, config.Option(value, type))
     box = (OptionBox(option, value, type, plugin=self.tab_name))
     self.options.append(box)
     self.add(box)
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
import sys
import torch
from torch import nn, autograd
import config
import time
import copy
import progressbar as pb
from dataset import TrainDataSet
from model import BiAffineSrlModel
from fscore import FScore

config.add_option('-m',
                  '--mode',
                  dest='mode',
                  default='train',
                  type='string',
                  help='[train|eval|pred]',
                  action='store')
config.add_option('--seed',
                  dest='seed',
                  default=1,
                  type='int',
                  help='torch random seed',
                  action='store')


def train(num_epochs=30):
    lossfunction = nn.CrossEntropyLoss()
    trainset = TrainDataSet()
    model = BiAffineSrlModel(vocabs=trainset.vocabs)