Exemplo n.º 1
0
class AdversarialDomainAdaptation(nn.Module):
    def __init__(self, input_dim, cnn_hidden_dim, filter_width,
                 lstm_hidden_dim, Lambda, use_cuda):
        super(AdversarialDomainAdaptation, self).__init__()
        self.question_encoder_cnn = CNNEncoder(input_dim,
                                               cnn_hidden_dim,
                                               filter_width,
                                               use_cuda=use_cuda)
        self.question_encoder_lstm = LSTMEncoder(input_dim,
                                                 lstm_hidden_dim,
                                                 use_cuda=use_cuda)
        self.gradient_reversal = GradientReversalLayer(Lambda, use_cuda)
        self.domain_classifier_cnn = DomainClassifier(input_dim=cnn_hidden_dim,
                                                      use_cuda=use_cuda)
        self.domain_classifier_lstm = DomainClassifier(
            input_dim=lstm_hidden_dim, use_cuda=use_cuda)

        if use_cuda:
            self.cuda()

    def forward(self,
                title,
                body,
                title_mask,
                body_mask,
                use_cnn=True,
                use_domain_classifier=True,
                return_average=True):
        """
    Runs one forward pass on the input.

    Return two things:
     - the embedding, so that we can feed it into the loss function for label
       prediction (only if the input came from source not target dataset)
     - the predicted domain label from softmax, so that we can feed it into
       the loss function for domain classification
    """
        title_embedding = None
        body_embedding = None
        if use_cnn:
            title_embedding = self.question_encoder_cnn.run_all(
                title, title_mask)
            body_embedding = self.question_encoder_cnn.run_all(body, body_mask)
        else:
            title_embedding = self.question_encoder_lstm.run_all(
                title, title_mask)
            body_embedding = self.question_encoder_lstm.run_all(
                body, body_mask)
        embedding = (title_embedding + body_embedding) / 2
        domain_label = None
        if use_domain_classifier:
            reverse = self.gradient_reversal(embedding)
            if use_cnn:
                domain_label = self.domain_classifier_cnn(reverse)
            else:
                domain_label = self.domain_classifier_lstm(reverse)
        return embedding, domain_label

    def change_lambda(self, Lambda):
        self.gradient_reversal.change_lambda(Lambda)
Exemplo n.º 2
0
    def __init__(self, input_dim, cnn_hidden_dim, filter_width,
                 lstm_hidden_dim, Lambda, use_cuda):
        super(AdversarialDomainAdaptation, self).__init__()
        self.question_encoder_cnn = CNNEncoder(input_dim,
                                               cnn_hidden_dim,
                                               filter_width,
                                               use_cuda=use_cuda)
        self.question_encoder_lstm = LSTMEncoder(input_dim,
                                                 lstm_hidden_dim,
                                                 use_cuda=use_cuda)
        self.gradient_reversal = GradientReversalLayer(Lambda, use_cuda)
        self.domain_classifier_cnn = DomainClassifier(input_dim=cnn_hidden_dim,
                                                      use_cuda=use_cuda)
        self.domain_classifier_lstm = DomainClassifier(
            input_dim=lstm_hidden_dim, use_cuda=use_cuda)

        if use_cuda:
            self.cuda()
Exemplo n.º 3
0
def part3(askubuntu_data, model_type, android_data):
    """
  Runs the model from part 3.

  If android_data is not None, also evaluates the model on the android_data
  for the direct transfer section of part 2.
  """
    model = None
    if model_type == ModelType.LSTM:
        model = LSTMEncoder(EMBEDDING_LENGTH,
                            LSTM_HIDDEN_DIM,
                            use_cuda=USE_CUDA,
                            return_average=True)
    elif model_type == ModelType.CNN:
        model = CNNEncoder(EMBEDDING_LENGTH,
                           CNN_HIDDEN_DIM,
                           FILTER_WIDTH,
                           use_cuda=USE_CUDA,
                           return_average=True)
    else:
        print "Error: unknown model type", model_type
        return
    train_model(model_type,
                askubuntu_data,
                model,
                NUM_EPOCHS,
                BATCH_SIZE,
                use_title=True,
                use_body=True,
                tfidf_weighting=True)
    print "----------Evaluating Part 2.3 on android dataset..."
    eval_part2(model,
               android_data,
               True,
               model_type,
               using_part1_model=True,
               tfidf_weighting=True)
    eval_part2(model,
               android_data,
               False,
               model_type,
               using_part1_model=True,
               tfidf_weighting=True)
Exemplo n.º 4
0
	def __init__(self, output_dim, hidden_dim,output_length, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid',
                 weights=None, truncate_gradient=-1,
                 input_dim=None, input_length=None, hidden_state=None, batch_size=None, depth=2, context_sensitive=False,
                 ):

		if not type(depth) == list:
			depth = [depth, depth]
		n_lstms = sum(depth)
		if  depth[1] < 2 and context_sensitive:
			print "Warning: Your model will not be able to remember its previous output!"
		if weights is None:
			weights = [None] * (n_lstms + 1)

		if hidden_state is None:
			hidden_state = [None] * (n_lstms + 1)

		encoder_index = depth[0] - 1
		decoder_index = depth[0] + 1

		decoder = LSTMDecoder2(dim=output_dim, hidden_dim=hidden_dim, output_length=output_length,
							  init=init,inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[decoder_index],
							  truncate_gradient = truncate_gradient, 
							  hidden_state=hidden_state[decoder_index], batch_size=batch_size, remember_state=context_sensitive)

		encoder = LSTMEncoder(input_dim=input_dim, output_dim=hidden_dim,init=init,
							  inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[encoder_index],
							  truncate_gradient = truncate_gradient, input_length=input_length,
							  hidden_state=hidden_state[encoder_index], batch_size=batch_size, remember_state=context_sensitive)

		left_deep = [LSTMEncoder(input_dim=input_dim, output_dim=input_dim,init=init,
							  inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[i],
							  truncate_gradient = truncate_gradient, input_length=input_length,
							  hidden_state=hidden_state[i], batch_size=batch_size, return_sequences=True, remember_state=context_sensitive)
					for i in range(depth[0]-1)]


		right_deep = [LSTMEncoder(input_dim=output_dim, output_dim=output_dim,init=init,
							  inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[decoder_index + 1 + i],
							  truncate_gradient = truncate_gradient, input_length=input_length,
							  hidden_state=hidden_state[decoder_index + 1 + i], batch_size=batch_size, return_sequences=True, remember_state=context_sensitive)
					for i in range(depth[1]-1)]

		dense = Dense(input_dim=hidden_dim, output_dim=output_dim)
		encoder.broadcast_state(decoder)
		if weights[depth[0]] is not None:
			dense.set_weights(weights[depth[0]])
		super(Seq2seq, self).__init__()
		for l in left_deep:
			self.add(l)
		self.add(encoder)
		self.add(dense)
		self.add(decoder)
		for l in right_deep:
			self.add(l)
		self.encoder = encoder
		self.dense = dense
		self.decoder = decoder
		self.left_deep = left_deep
		self.right_deep = right_deep
Exemplo n.º 5
0
 #embed_model = nn.Embedding(len(vocab), args.embed_size)
 embed_model = None
 dropout = 0.5
 model = MMSeq2SeqModel(
     None,
     HLSTMEncoder(args.hist_enc_layers[0],
                  args.hist_enc_layers[1],
                  len(vocab),
                  args.hist_out_size,
                  args.embed_size,
                  args.hist_enc_hsize,
                  dropout=dropout,
                  embed=embed_model),
     LSTMEncoder(args.in_enc_layers,
                 len(vocab),
                 args.in_enc_hsize,
                 args.embed_size,
                 dropout=dropout,
                 embed=embed_model),
     HLSTMDecoder(args.dec_layers,
                  len(vocab),
                  len(vocab),
                  args.embed_size,
                  args.hist_out_size + args.in_enc_hsize,
                  args.dec_hsize,
                  args.dec_psize,
                  independent=False,
                  dropout=dropout,
                  embed=embed_model))
 initialize_model_weights(model, "he", "xavier")
 # report data summary
 logging.info('#vocab = %d' % len(vocab))
def main():
    parser = argparse.ArgumentParser()
    # logging
    parser.add_argument('--logfile',
                        '-l',
                        default='',
                        type=str,
                        help='write log data into a file')
    parser.add_argument('--debug',
                        '-d',
                        action='store_true',
                        help='run in debug mode')
    parser.add_argument('--silent',
                        '-s',
                        action='store_true',
                        help='run in silent mode')
    parser.add_argument('--no-progress-bar',
                        action='store_true',
                        help='hide progress bar')
    # train and validate data
    parser.add_argument('--train',
                        default='train.txt',
                        type=str,
                        help='set filename of training data')
    parser.add_argument('--validate',
                        default='dev.txt',
                        type=str,
                        help='set filename of validation data')
    parser.add_argument('--vocab-size',
                        '-V',
                        default=0,
                        type=int,
                        help='set vocabulary size (0 means no limitation)')
    parser.add_argument(
        '--target-speaker',
        '-T',
        default='S',
        help='set target speaker name to be learned for system output')
    # file settings
    parser.add_argument('--initial-model',
                        '-i',
                        help='start training from an initial model')
    parser.add_argument('--model',
                        '-m',
                        required=True,
                        help='set prefix of output model files')
    parser.add_argument(
        '--resume',
        action='store_true',
        help='resume training from a previously saved snapshot')
    parser.add_argument('--snapshot',
                        type=str,
                        help='dump a snapshot to a file after each epoch')
    # Model structure
    parser.add_argument('--enc-layer',
                        default=2,
                        type=int,
                        help='number of encoder layers')
    parser.add_argument('--enc-esize',
                        default=100,
                        type=int,
                        help='number of encoder input-embedding units')
    parser.add_argument('--enc-hsize',
                        default=512,
                        type=int,
                        help='number of encoder hidden units')

    parser.add_argument('--dec-layer',
                        default=2,
                        type=int,
                        help='number of decoder layers')
    parser.add_argument('--dec-esize',
                        default=100,
                        type=int,
                        help='number of decoder input-embedding units')
    parser.add_argument('--dec-hsize',
                        default=512,
                        type=int,
                        help='number of decoder hidden units')
    parser.add_argument('--dec-psize',
                        default=100,
                        type=int,
                        help='number of decoder pre-output projection units')
    # training conditions
    parser.add_argument(
        '--optimizer',
        default='Adam',
        type=str,
        help="set optimizer (SGD, Adam, AdaDelta, RMSprop, ...)")
    parser.add_argument('--L2-weight',
                        default=0.0,
                        type=float,
                        help="set weight for L2-regularization term")
    parser.add_argument('--clip-grads',
                        default=5.,
                        type=float,
                        help="set gradient clipping threshold")
    parser.add_argument('--dropout-rate',
                        default=0.5,
                        type=float,
                        help="set dropout rate in training")
    parser.add_argument('--num-epochs',
                        '-e',
                        default=20,
                        type=int,
                        help='number of epochs to be trained')
    parser.add_argument('--learn-rate',
                        '-R',
                        default=1.0,
                        type=float,
                        help='set initial learning rate for SGD')
    parser.add_argument('--learn-decay',
                        default=1.0,
                        type=float,
                        help='set decaying ratio of learning rate or epsilon')
    parser.add_argument(
        '--lower-bound',
        default=1e-16,
        type=float,
        help='set threshold of learning rate or epsilon for early stopping')
    parser.add_argument('--batch-size',
                        '-b',
                        default=50,
                        type=int,
                        help='set batch size for training and validation')
    parser.add_argument(
        '--max-batch-length',
        default=20,
        type=int,
        help='set maximum sequence length to control batch size')
    parser.add_argument('--seed',
                        default=99,
                        type=int,
                        help='set a seed for random numbers')
    # select a GPU device
    parser.add_argument('--gpu',
                        '-g',
                        default=0,
                        type=int,
                        help='GPU ID (negative value indicates CPU)')

    args = parser.parse_args()

    # flush stdout
    if six.PY2:
        sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
    # set up the logger
    tqdm_logging.config(logger,
                        args.logfile,
                        mode=('a' if args.resume else 'w'),
                        silent=args.silent,
                        debug=args.debug)
    # gpu setup
    if args.gpu >= 0:
        cuda.check_cuda_available()
        cuda.get_device(args.gpu).use()
        xp = cuda.cupy
        xp.random.seed(args.seed)
    else:
        xp = np

    # randomize
    np.random.seed(args.seed)
    random.seed(args.seed)

    logger.info('----------------------------------')
    logger.info('Train a neural conversation model')
    logger.info('----------------------------------')
    if args.resume:
        if not args.snapshot:
            logger.error('snapshot file is not spacified.')
            sys.exit()

        with open(args.snapshot, 'rb') as f:
            vocab, optimizer, status, args = pickle.load(f)
        logger.info('Resume training from epoch %d' % status.epoch)
        logger.info('Args ' + str(args))
        model = optimizer.target
    else:
        logger.info('Args ' + str(args))
        # Prepare RNN model and load data
        if args.initial_model:
            logger.info('Loading a model from ' + args.initial_model)
            with open(args.initial_model, 'rb') as f:
                vocab, model, tmp_args = pickle.load(f)
            status.cur_at = time.time()
        else:
            logger.info('Making vocabulary from ' + args.train)
            vocab = dialog_corpus.get_vocabulary(args.train,
                                                 vocabsize=args.vocab_size)
            model = Sequence2SequenceModel(
                LSTMEncoder(args.enc_layer,
                            len(vocab),
                            args.enc_hsize,
                            args.enc_esize,
                            dropout=args.dropout_rate),
                LSTMDecoder(args.dec_layer,
                            len(vocab),
                            len(vocab),
                            args.dec_esize,
                            args.dec_hsize,
                            args.dec_psize,
                            dropout=args.dropout_rate))
        # Setup optimizer
        optimizer = vars(optimizers)[args.optimizer]()
        if args.optimizer == 'SGD':
            optimizer.lr = args.learn_rate
        optimizer.use_cleargrads()
        optimizer.setup(model)
        optimizer.add_hook(chainer.optimizer.GradientClipping(args.clip_grads))
        if args.L2_weight > 0.:
            optimizer.add_hook(chainer.optimizer.WeightDecay(args.L2_weight))
        status = None

    logger.info('Loading text data from ' + args.train)
    train_set = dialog_corpus.load(args.train, vocab, args.target_speaker)
    logger.info('Loading validation data from ' + args.validate)
    validate_set = dialog_corpus.load(args.validate, vocab,
                                      args.target_speaker)
    logger.info('Making mini batches')
    train_batchset = dialog_corpus.make_minibatches(
        train_set, batchsize=args.batch_size, max_length=args.max_batch_length)
    validate_batchset = dialog_corpus.make_minibatches(
        validate_set,
        batchsize=args.batch_size,
        max_length=args.max_batch_length)
    # report data summary
    logger.info('vocabulary size = %d' % len(vocab))
    logger.info('#train sample = %d  #mini-batch = %d' %
                (len(train_set), len(train_batchset)))
    logger.info('#validate sample = %d  #mini-batch = %d' %
                (len(validate_set), len(validate_batchset)))
    random.shuffle(train_batchset, random.random)

    # initialize status parameters
    if status is None:
        status = Status(max(round(len(train_batchset), -3) / 50, 500),
                        progress_bar=not args.no_progress_bar)
    else:
        status.progress_bar = not args.no_progress_bar

    # move model to gpu
    if args.gpu >= 0:
        model.to_gpu()

    while status.epoch <= args.num_epochs:
        logger.info('---------------------training--------------------------')
        if args.optimizer == 'SGD':
            logger.info('Epoch %d/%d : SGD learning rate = %g' %
                        (status.epoch, args.num_epochs, optimizer.lr))
        else:
            logger.info(
                'Epoch %d/%d : %s eps = %g' %
                (status.epoch, args.num_epochs, args.optimizer, optimizer.eps))
        train_ppl = train_step(model, optimizer, train_set, train_batchset,
                               status, xp)
        logger.info("epoch %d training perplexity: %f" %
                    (status.epoch, train_ppl))
        # write the model params
        modelfile = args.model + '.' + str(status.epoch)
        logger.info('writing model params to ' + modelfile)
        model.to_cpu()
        with open(modelfile, 'wb') as f:
            pickle.dump((vocab, model, args), f, -1)
        if args.gpu >= 0:
            model.to_gpu()

        # start validation step
        logger.info('---------------------validation------------------------')
        start_at = time.time()
        validate_ppl = validate_step(model, validate_set, validate_batchset,
                                     status, xp)
        logger.info('epoch %d validation perplexity: %.4f' %
                    (status.epoch, validate_ppl))
        # update best model with the minimum perplexity
        if status.min_validate_ppl >= validate_ppl:
            status.bestmodel_num = status.epoch
            logger.info('validation perplexity reduced: %.4f -> %.4f' %
                        (status.min_validate_ppl, validate_ppl))
            status.min_validate_ppl = validate_ppl

        elif args.optimizer == 'SGD':
            modelfile = args.model + '.' + str(status.bestmodel_num)
            logger.info('reloading model params from ' + modelfile)
            with open(modelfile, 'rb') as f:
                vocab, model, tmp_args = pickle.load(f)
            if args.gpu >= 0:
                model.to_gpu()
            optimizer.lr *= args.learn_decay
            if optimizer.lr < args.lower_bound:
                break
            optimizer.setup(model)
        else:
            optimizer.eps *= args.learn_decay
            if optimizer.eps < args.lower_bound:
                break

        status.new_epoch(validate_time=time.time() - start_at)
        # dump snapshot
        if args.snapshot:
            logger.info('writing snapshot to ' + args.snapshot)
            model.to_cpu()
            with open(args.snapshot, 'wb') as f:
                pickle.dump((vocab, optimizer, status, args), f, -1)
            if args.gpu >= 0:
                model.to_gpu()

    logger.info('----------------')
    # make a symbolic link to the best model
    logger.info('the best model is %s.%d.' %
                (args.model, status.bestmodel_num))
    logger.info('a symbolic link is made as ' + args.model + '.best')
    if os.path.exists(args.model + '.best'):
        os.remove(args.model + '.best')
    os.symlink(os.path.basename(args.model + '.' + str(status.bestmodel_num)),
               args.model + '.best')
    logger.info('done')
Exemplo n.º 7
0
	def __init__(self, output_dim, hidden_dim,output_length, init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one', activation='tanh', inner_activation='hard_sigmoid',
                 weights=None, truncate_gradient=-1,
                 input_dim=None, input_length=None, hidden_state=None, batch_size=None, depth=1, remember_state=False,
                 ):

		if not type(depth) == list:
			depth = [depth, depth]
		n_lstms = sum(depth)

		if weights is None:
			weights = [None] * (n_lstms + 1)

		if hidden_state is None:
			hidden_state = [None] * (n_lstms + 1)

		encoder_index = depth[0] - 1
		decoder_index = depth[0] + 1

		decoder = LSTMDecoder(dim=output_dim, hidden_dim=hidden_dim, output_length=output_length,
							  init=init,inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[decoder_index],
							  truncate_gradient = truncate_gradient, 
							  hidden_state=hidden_state[decoder_index], batch_size=batch_size, remember_state=remember_state)

		encoder = LSTMEncoder(input_dim=input_dim, output_dim=hidden_dim,init=init,
							  inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[encoder_index],
							  truncate_gradient = truncate_gradient, input_length=input_length,
							  hidden_state=hidden_state[encoder_index], batch_size=batch_size, remember_state=remember_state)

		left_deep = [LSTMEncoder(input_dim=input_dim, output_dim=input_dim,init=init,
							  inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[i],
							  truncate_gradient = truncate_gradient, input_length=input_length,
							  hidden_state=hidden_state[i], batch_size=batch_size, return_sequences=True, remember_state=remember_state)
					for i in range(depth[0]-1)]


		right_deep = [LSTMEncoder(input_dim=output_dim, output_dim=output_dim,init=init,
							  inner_init=inner_init, activation=activation, 
							  inner_activation=inner_activation,weights=weights[decoder_index + 1 + i],
							  truncate_gradient = truncate_gradient, input_length=input_length,
							  hidden_state=hidden_state[decoder_index + 1 + i], batch_size=batch_size, return_sequences=True, remember_state=remember_state)
					for i in range(depth[1]-1)]

		dense = Dense(input_dim=hidden_dim, output_dim=output_dim)
		encoder.broadcast_state(decoder)
		if weights[depth[0]] is not None:
			dense.set_weights(weights[depth[0]])
		super(Seq2seq, self).__init__()
		for l in left_deep:
			self.add(l)
		self.add(encoder)
		self.add(dense)
		self.add(decoder)
		for l in right_deep:
			self.add(l)
		self.encoder = encoder
		self.dense = dense
		self.decoder = decoder
		self.left_deep = left_deep
		self.right_deep = right_deep