Exemplo n.º 1
0
def main(params):

    dp = DataProvider(params)

    # Create vocabulary and author index
    if params['resume'] == None:
        if params['atoms'] == 'char':
            char_to_ix, ix_to_char = dp.createCharVocab(
                params['vocab_threshold'])
        else:
            char_to_ix, ix_to_char = dp.createWordVocab(
                params['vocab_threshold'])
        auth_to_ix, ix_to_auth = dp.createAuthorIdx()
    else:
        saved_model = torch.load(params['resume'])
        char_to_ix = saved_model['char_to_ix']
        auth_to_ix = saved_model['auth_to_ix']
        ix_to_auth = saved_model['ix_to_auth']
        ix_to_char = saved_model['ix_to_char']

    params['vocabulary_size'] = len(char_to_ix)
    params['num_output_layers'] = len(auth_to_ix)

    model = CharTranslator(params)
    # set to train mode, this activates dropout
    model.train()
    #Initialize the RMSprop optimizer

    if params['use_sgd']:
        optim = torch.optim.SGD(model.parameters(),
                                lr=params['learning_rate'],
                                momentum=params['decay_rate'])
    else:
        optim = torch.optim.RMSprop(model.parameters(),
                                    lr=params['learning_rate'],
                                    alpha=params['decay_rate'],
                                    eps=params['smooth_eps'])
    # Loss function
    if params['mode'] == 'generative':
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.NLLLoss()

    # Restore saved checkpoint
    if params['resume'] != None:
        model.load_state_dict(saved_model['state_dict'])
        optim.load_state_dict(saved_model['optimizer'])

    total_loss = 0.
    start_time = time.time()
    hidden = model.init_hidden(params['batch_size'])
    hidden_zeros = model.init_hidden(params['batch_size'])
    # Initialize the cache
    if params['randomize_batches']:
        dp.set_hid_cache(range(len(dp.data['docs'])), hidden_zeros)

    # Compute the iteration parameters
    epochs = params['max_epochs']
    total_seqs = dp.get_num_sents(split='train')
    iter_per_epoch = total_seqs // params['batch_size']
    total_iters = iter_per_epoch * epochs
    best_loss = 1000000.
    best_val = 1000.
    eval_every = int(iter_per_epoch * params['eval_interval'])

    #val_score = eval_model(dp, model, params, char_to_ix, auth_to_ix, split='val', max_docs = params['num_eval'])
    val_score = 0.  #eval_model(dp, model, params, char_to_ix, auth_to_ix, split='val', max_docs = params['num_eval'])
    val_rank = 1000

    eval_function = eval_translator if params[
        'mode'] == 'generative' else eval_classify
    leakage = 0.  #params['leakage']

    print total_iters
    for i in xrange(total_iters):
        #TODO
        if params['split_generators']:
            c_aid = ix_to_auth[np.random.choice(auth_to_ix.values())]
        else:
            c_aid = None

        batch = dp.get_sentence_batch(params['batch_size'],
                                      split='train',
                                      atoms=params['atoms'],
                                      aid=c_aid,
                                      sample_by_len=params['sample_by_len'])
        inps, targs, auths, lens = dp.prepare_data(
            batch, char_to_ix, auth_to_ix, maxlen=params['max_seq_len'])
        # Reset the hidden states for which new docs have been sampled

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optim.zero_grad()
        #TODO
        if params['mode'] == 'generative':
            output, _ = model.forward_mltrain(inps,
                                              lens,
                                              inps,
                                              lens,
                                              hidden_zeros,
                                              auths=auths)
            targets = pack_padded_sequence(Variable(targs).cuda(), lens)
            loss = criterion(pack_padded_sequence(output, lens)[0], targets[0])
        else:
            # for classifier auths is the target
            output, hidden = model.forward_classify(inps,
                                                    hidden,
                                                    compute_softmax=True)
            targets = Variable(auths).cuda()
            loss = criterion(output, targets)
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), params['grad_clip'])

        # Take an optimization step
        optim.step()

        total_loss += loss.data.cpu().numpy()[0]

        # Save the hidden states in cache for later use
        if i % eval_every == 0 and i > 0:
            val_rank, val_score = eval_function(dp,
                                                model,
                                                params,
                                                char_to_ix,
                                                auth_to_ix,
                                                split='val')

        #if i % iter_per_epoch == 0 and i > 0 and leakage > params['leakage_min']:
        #    leakage = leakage * params['leakage_decay']

        #if (i % iter_per_epoch == 0) and ((i//iter_per_epoch) >= params['lr_decay_st']):
        if i % params['log_interval'] == 0 and i > 0:
            cur_loss = total_loss / params['log_interval']
            elapsed = time.time() - start_time
            print(
                '| epoch {:2.2f} | {:5d}/{:5d} batches | lr {:02.2e} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    float(i) / iter_per_epoch, i, total_iters,
                    params['learning_rate'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0.

            if val_rank <= best_val:
                save_checkpoint(
                    {
                        'iter': i,
                        'arch': params,
                        'val_loss': val_rank,
                        'val_pplx': val_score,
                        'char_to_ix': char_to_ix,
                        'ix_to_char': ix_to_char,
                        'auth_to_ix': auth_to_ix,
                        'ix_to_auth': ix_to_auth,
                        'state_dict': model.state_dict(),
                        'loss': cur_loss,
                        'optimizer': optim.state_dict(),
                    },
                    fappend=params['fappend'],
                    outdir=params['checkpoint_output_directory'])
                best_val = val_rank
            start_time = time.time()
Exemplo n.º 2
0
def main(params):
    dp = DataProvider(params)

    # Create vocabulary and author index
    if params['resume'] == None:
        if params['atoms'] == 'char':
            char_to_ix, ix_to_char = dp.create_char_vocab(
                params['vocab_threshold'])
        else:
            char_to_ix, ix_to_char = dp.create_word_vocab(
                params['vocab_threshold'])
        auth_to_ix, ix_to_auth = dp.create_author_idx()
    else:
        saved_model = torch.load(params['resume'])
        char_to_ix = saved_model['char_to_ix']
        auth_to_ix = saved_model['auth_to_ix']
        ix_to_char = saved_model['ix_to_char']

    params['vocabulary_size'] = len(char_to_ix)
    params['num_output_layers'] = len(auth_to_ix)
    print
    params['vocabulary_size'], params['num_output_layers']

    model = get_classifier(params)
    # set to train mode, this activates dropout
    model.train()
    # Initialize the RMSprop optimizer

    if params['use_sgd']:
        optim = torch.optim.SGD(model.parameters(),
                                lr=params['learning_rate'],
                                momentum=params['decay_rate'])
    else:
        optim = torch.optim.RMSprop([{
            'params':
            [p[1] for p in model.named_parameters() if p[0] != 'decoder_W']
        }, {
            'params': model.decoder_W,
            'weight_decay': 0.000
        }],
                                    lr=params['learning_rate'],
                                    alpha=params['decay_rate'],
                                    eps=params['smooth_eps'])
    # Loss function
    if len(params['balance_loss']) == 0:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.CrossEntropyLoss(
            torch.FloatTensor(params['balance_loss']).cuda())

    # Restore saved checkpoint
    if params['resume'] != None:
        model.load_state_dict(saved_model['state_dict'])
        # optim.load_state_dict(saved_model['optimizer'])

    total_loss = 0.
    class_loss = 0.
    start_time = time.time()
    hidden = model.init_hidden(params['batch_size'])
    hidden_zeros = model.init_hidden(params['batch_size'])
    # Initialize the cache
    if params['randomize_batches']:
        dp.set_hid_cache(range(len(dp.data['docs'])), hidden_zeros)

    # Compute the iteration parameters
    epochs = params['max_epochs']
    total_seqs = dp.get_num_sents(split='train')
    iter_per_epoch = total_seqs // params['batch_size']
    total_iters = iter_per_epoch * epochs
    best_loss = 0.
    best_val = 1000.
    eval_every = int(iter_per_epoch * params['eval_interval'])

    # val_score = eval_model(dp, model, params, char_to_ix, auth_to_ix, split='val', max_docs = params['num_eval'])
    val_score = 0.  # eval_model(dp, model, params, char_to_ix, auth_to_ix, split='val', max_docs = params['num_eval'])
    val_rank = 0

    eval_function = eval_model if params[
        'mode'] == 'generative' else eval_classify

    leakage = params['leakage']
    for i in xrange(total_iters):
        # TODO
        if params['randomize_batches']:
            batch, reset_next = dp.get_rand_doc_batch(params['batch_size'],
                                                      split='train')
            b_ids = [b['id'] for b in batch]
            hidden = dp.get_hid_cache(b_ids, hidden)
        elif params['use_sentences']:
            c_aid = None  # ix_to_auth[np.random.choice(auth_to_ix.values())]
            batch = dp.get_sentence_batch(
                params['batch_size'],
                split='train',
                aid=c_aid,
                atoms=params['atoms'],
                sample_by_len=params['sample_by_len'])
            hidden = hidden_zeros
        else:
            batch, reset_h = dp.get_doc_batch(split='train')
            if len(reset_h) > 0:
                hidden[0].data.index_fill_(1,
                                           torch.LongTensor(reset_h).cuda(),
                                           0.)
                hidden[1].data.index_fill_(1,
                                           torch.LongTensor(reset_h).cuda(),
                                           0.)

        inps, targs, auths, lens = dp.prepare_data(batch,
                                                   char_to_ix,
                                                   auth_to_ix,
                                                   leakage=leakage)

        # Reset the hidden states for which new docs have been sampled

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        optim.zero_grad()

        # TODO
        if params['mode'] == 'generative':
            output, hidden = model.forward(inps, lens, hidden, auths)
            targets = pack_padded_sequence(Variable(targs).cuda(), lens)
            loss = criterion(pack_padded_sequence(output, lens)[0], targets[0])
        else:
            # for classifier auths is the target
            output, _ = model.forward_classify(targs,
                                               hidden,
                                               compute_softmax=False,
                                               lens=lens)
            targets = Variable(auths).cuda()
            lossClass = criterion(output, targets)
            if params['compression_layer']:
                loss = lossClass + (model.compression_W.weight.norm(
                    p=1, dim=1)).mean()
            else:
                loss = lossClass
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), params['grad_clip'])

        # Take an optimization step
        optim.step()

        total_loss += loss.data.cpu().numpy()[0]
        class_loss += lossClass.data.cpu().numpy()[0]

        # Save the hidden states in cache for later use
        if params['randomize_batches']:
            if len(reset_next) > 0:
                hidden[0].data.index_fill_(1,
                                           torch.LongTensor(reset_next).cuda(),
                                           0.)
                hidden[1].data.index_fill_(1,
                                           torch.LongTensor(reset_next).cuda(),
                                           0.)
            dp.set_hid_cache(b_ids, hidden)

        if i % eval_every == 0 and i > 0:
            val_rank, val_score = eval_function(dp,
                                                model,
                                                params,
                                                char_to_ix,
                                                auth_to_ix,
                                                split='val',
                                                max_docs=params['num_eval'])

        if i % iter_per_epoch == 0 and i > 0 and leakage > params[
                'leakage_min']:
            leakage = leakage * params['leakage_decay']

        # if (i % iter_per_epoch == 0) and ((i//iter_per_epoch) >= params['lr_decay_st']):
        if i % params['log_interval'] == 0 and i > 0:
            cur_loss = total_loss / params['log_interval']
            class_loss = class_loss / params['log_interval']
            elapsed = time.time() - start_time
            print(
                '| epoch {:3.2f} | {:5d}/{:5d} batches | lr {:02.2e} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    float(i) / iter_per_epoch, i, total_iters,
                    params['learning_rate'],
                    elapsed * 1000 / args.log_interval, cur_loss,
                    math.exp(class_loss)))

            if val_rank >= best_loss:
                best_loss = val_rank
                save_checkpoint(
                    {
                        'iter': i,
                        'arch': params,
                        'val_mean_rank': val_rank,
                        'val_auc': val_score,
                        'char_to_ix': char_to_ix,
                        'ix_to_char': ix_to_char,
                        'auth_to_ix': auth_to_ix,
                        'state_dict': model.state_dict(),
                        'loss': cur_loss,
                        'optimizer': optim.state_dict(),
                    },
                    fappend=params['fappend'],
                    outdir=params['checkpoint_output_directory'])
                best_val = val_rank
            start_time = time.time()
            total_loss = 0.
            class_loss = 0.