class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, name="Model"):
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.name = name

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)

        out = out.view(out.size(0), -1)
        return F.log_softmax(out, dim=-1)

    def summary(self, input_size):
        summary(self, input_size=input_size)

    def gotrain(self,
                optimizer,
                train_loader,
                test_loader,
                epochs,
                statspath,
                scheduler=None,
                batch_scheduler=False,
                L1lambda=0):
        self.trainer = ModelTrainer(self, optimizer, train_loader, test_loader,
                                    statspath, scheduler, batch_scheduler,
                                    L1lambda)
        self.trainer.run(epochs)

    def stats(self):
        return self.trainer.stats if self.trainer else None
示例#2
0
 def gotrain(self,
             optimizer,
             train_loader,
             test_loader,
             epochs,
             statspath,
             scheduler=None,
             batch_scheduler=False,
             L1lambda=0):
     self.trainer = ModelTrainer(self, optimizer, train_loader, test_loader,
                                 statspath, scheduler, batch_scheduler,
                                 L1lambda)
     self.trainer.run(epochs)
示例#3
0
class Net(nn.Module):
    """
    Base network that defines helper functions, summary and mapping to device
    """
    def conv2d(self, in_channels, out_channels, kernel_size=(3,3), dilation=1, groups=1, padding=1, bias=False, padding_mode="zeros"):
      return [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, groups=groups, dilation=dilation, padding=padding, bias=bias, padding_mode=padding_mode)]

   

    def activate(self, l, out_channels, bn=True, dropout=0, relu=True):
      if bn:
        l.append(nn.BatchNorm2d(out_channels))
      if dropout>0:
        l.append(nn.Dropout(dropout))
      if relu:
        l.append(nn.ReLU())

      return nn.Sequential(*l)

    def create_conv2d(self, in_channels, out_channels, kernel_size=(3,3), dilation=1, groups=1, padding=1, bias=False, bn=True, dropout=0, relu=True, padding_mode="zeros"):
      return self.activate(self.conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, groups=groups, dilation=dilation, padding=padding, bias=bias, padding_mode=padding_mode), out_channels, bn, dropout, relu)

    def create_depthwise_conv2d(self, in_channels, out_channels, kernel_size=(3,3), dilation=1, padding=1, bias=False, bn=True, dropout=0, relu=True, padding_mode="zeros"):
      return self.activate(self.separable_conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, dilation=dilation, padding=padding, bias=bias, padding_mode=padding_mode),
                 out_channels, bn, dropout, relu)

    def __init__(self, name="Model"):
        super(Net, self).__init__()
        self.trainer = None
        self.name = name

    def summary(self, input_size): #input_size=(1, 28, 28)
      summary(self, input_size=input_size)

    def gotrain(self, optimizer, train_loader, test_loader, epochs, statspath, scheduler=None, batch_scheduler=False, L1lambda=0):
      self.trainer = ModelTrainer(self, optimizer, train_loader, test_loader, statspath, scheduler, batch_scheduler, L1lambda)
      self.trainer.run(epochs)

    def stats(self):
      return self.trainer.stats if self.trainer else None
示例#4
0
def train(train_path,
          validation_path,
          dictionary_path,
          model_path,
          reload_state=False,
          dim_word=100, # word vector dimensionality
          dim=1000, # the number of LSTM units
          encoder='lstm',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0., 
          alpha_c=0., 
          diag_c=0.,
          lrate=0.01, 
          n_words=100000,
          maxlen=100, # maximum length of the description
          optimizer='rmsprop', 
          batch_size = 16,
          valid_batch_size = 16,
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          sampleFreq=100, # generate some text samples after every sampleFreq updates
          profile=False):

    # Model options
    model_options = locals().copy()

    worddicts = dict()
    worddicts_r = dict()
    with open(dictionary_path, 'rb') as f:
        for (i, line) in enumerate(f):
            word = line.strip()
            code = i + 2
            worddicts_r[code] = word
            worddicts[word] = code

    # reload options
    if reload_state and os.path.exists(model_path):
        with open('%s.pkl' % model_path, 'rb') as f:
            models_options = pkl.load(f)

    print '### Loading data.'

    train = TextIterator(train_path, 
                         worddicts,
                         n_words_source=n_words, 
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(validation_path, 
                         worddicts,
                         n_words_source=n_words, 
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print '### Building neural network.'

    rnnlm = RNNLM(model_options)
    trainer = ModelTrainer(rnnlm, optimizer, model_options)
    sampler = TextSampler(rnnlm, model_options)

    print '### Training neural network.'

    best_params = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0])/batch_size
    if saveFreq == -1:
        saveFreq = len(train[0])/batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0])/batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x in train:
            n_samples += len(x)
            uidx += 1

            x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = trainer.f_grad_shared(x, x_mask)
            trainer.f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                # Save the best parameters, or the current state if best_params
                # is None.
                rnnlm.save_params(best_params)
                # Save the training options.
                pkl.dump(model_options, open('%s.pkl' % model_path, 'wb'))

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(5):
                    sample, score = sampler.generate()
                    print 'Sample ', jj, ': ',
                    ss = sample
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r:
                            print worddicts_r[vv], 
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid)
                valid_err = valid_errs.mean()
                rnnlm.error_history.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(error_history).min():
                    best_params = rnnlm.get_param_values()
                    bad_counter = 0
                if len(rnnlm.error_history) > patience and valid_err >= numpy.array(rnnlm.error_history)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples'%n_samples

        if estop:
            break

    if best_params is not None:
        rnnlm.set_param_values(best_params)

    valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean()

    print 'Valid ', valid_err

    params = copy.copy(best_params)
    numpy.savez(model_path, zipped_params=best_params, 
                error_history=rnnlm.error_history, 
                **params)

    return valid_err
示例#5
0
def train(
        train_path,
        validation_path,
        dictionary_path,
        model_path,
        reload_state=False,
        dim_word=100,  # word vector dimensionality
        dim=1000,  # the number of LSTM units
        encoder='lstm',
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        diag_c=0.,
        lrate=0.01,
        n_words=100000,
        maxlen=100,  # maximum length of the description
        optimizer='rmsprop',
        batch_size=16,
        valid_batch_size=16,
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        sampleFreq=100,  # generate some text samples after every sampleFreq updates
        profile=False):

    # Model options
    model_options = locals().copy()

    worddicts = dict()
    worddicts_r = dict()
    with open(dictionary_path, 'rb') as f:
        for (i, line) in enumerate(f):
            word = line.strip()
            code = i + 2
            worddicts_r[code] = word
            worddicts[word] = code

    # reload options
    if reload_state and os.path.exists(model_path):
        with open('%s.pkl' % model_path, 'rb') as f:
            models_options = pkl.load(f)

    print '### Loading data.'

    train = TextIterator(train_path,
                         worddicts,
                         n_words_source=n_words,
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(validation_path,
                         worddicts,
                         n_words_source=n_words,
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print '### Building neural network.'

    rnnlm = RNNLM(model_options)
    trainer = ModelTrainer(rnnlm, optimizer, model_options)
    sampler = TextSampler(rnnlm, model_options)

    print '### Training neural network.'

    best_params = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0]) / batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x in train:
            n_samples += len(x)
            uidx += 1

            x, x_mask = prepare_data(x, maxlen=maxlen, n_words=n_words)

            if x == None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = trainer.f_grad_shared(x, x_mask)
            trainer.f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                # Save the best parameters, or the current state if best_params
                # is None.
                rnnlm.save_params(best_params)
                # Save the training options.
                pkl.dump(model_options, open('%s.pkl' % model_path, 'wb'))

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(5):
                    sample, score = sampler.generate()
                    print 'Sample ', jj, ': ',
                    ss = sample
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r:
                            print worddicts_r[vv],
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                valid_errs = pred_probs(f_log_probs, prepare_data,
                                        model_options, valid)
                valid_err = valid_errs.mean()
                rnnlm.error_history.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(error_history).min():
                    best_params = rnnlm.get_param_values()
                    bad_counter = 0
                if len(rnnlm.error_history
                       ) > patience and valid_err >= numpy.array(
                           rnnlm.error_history)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb
                    ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_params is not None:
        rnnlm.set_param_values(best_params)

    valid_err = pred_probs(f_log_probs, prepare_data, model_options,
                           valid).mean()

    print 'Valid ', valid_err

    params = copy.copy(best_params)
    numpy.savez(model_path,
                zipped_params=best_params,
                error_history=rnnlm.error_history,
                **params)

    return valid_err