Exemplo n.º 1
0
def main():
    parse = argparse.ArgumentParser()

    parse.add_argument("--batch_size", default=16, type=int)
    parse.add_argument("--do_train",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--do_eval",
                       default=True,
                       action="store_true",
                       help="Whether to run training.")
    parse.add_argument("--learnning_rate", default=1e-4, type=float)
    parse.add_argument("--num_epoch", default=5, type=int)
    parse.add_argument("--max_vocab_size", default=50000, type=int)
    parse.add_argument("--embed_size", default=300, type=int)
    parse.add_argument("--warmup_steps",
                       default=0,
                       type=int,
                       help="Linear warmup over warmup_steps.")
    parse.add_argument("--hidden_size", default=1000, type=int)
    parse.add_argument("--num_layers", default=2, type=int)
    parse.add_argument("--GRAD_CLIP", default=1, type=float)
    args = parse.parse_args()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device

    setseed()

    VOCAB_SIZE, train_iter, dev_iter, test_iter, weight_matrix = preprocess_data(
        args)

    model = RNNModel(weight_matrix, 'GRU', VOCAB_SIZE, args.embed_size,
                     args.hidden_size, args.num_layers)
    model.to(device)

    loss_fn = nn.CrossEntropyLoss()  # 交叉熵损失
    if args.do_train:
        train(args, model, train_iter, dev_iter, loss_fn, VOCAB_SIZE)

    if args.do_eval:
        model.load_state_dict(torch.load('lm-best-GRU.th'))
        model.to(device)

        test_loss = evaluate(args, model, test_iter, loss_fn, VOCAB_SIZE)
        LOG_FILE = "language_model_GRU.log"
        with open(LOG_FILE, 'a') as fout:
            fout.write("test perplexity: {} ".format(np.exp(test_loss)))
        print("perplexity: ", np.exp(test_loss))
Exemplo n.º 2
0
def model_fn(model_dir):
    """Load the PyTorch model from the `model_dir` directory."""
    print("Loading model.")

    # First, load the parameters used to create the model.
    model_info = {}
    model_info_path = os.path.join(model_dir, 'model_info.pth')
    with open(model_info_path, 'rb') as f:
        model_info = torch.load(f)

    print("model_info: {}".format(model_info))

    # Determine the device and construct the model.
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = RNNModel(model_info['vocab_size'], model_info['embedding_dim'],
                     model_info['hidden_dim'], model_info['n_layers'],
                     model_info['drop_rate'])

    # Load the stored model parameters.
    model_path = os.path.join(model_dir, 'model.pth')
    with open(model_path, 'rb') as f:
        model.load_state_dict(
            torch.load(f, map_location=lambda storage, loc: storage))

    # Load the saved word_dict.
    word_dict_path = os.path.join(model_dir, 'char_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.char2int = pickle.load(f)

    word_dict_path = os.path.join(model_dir, 'int_dict.pkl')
    with open(word_dict_path, 'rb') as f:
        model.int2char = pickle.load(f)

    model.to(device).eval()

    print("Done loading model.")
    return model
Exemplo n.º 3
0
def train():

    # 模型定义
    model = RNNModel(len(word2ix), embed_size, hidden_dims)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    model.to(device)
    model.train()
    for epoch in (range(epochs)):
        total_loss = 0
        count = 0
        for ii, data_ in tqdm.tqdm(enumerate(data)):
            data_ = torch.tensor(data_).long()
            x = data_.unsqueeze(1).to(device)
            optimizer.zero_grad()
            y = torch.zeros(x.shape).to(device).long()
            y[:-1], y[-1] = x[1:], x[0]
            output, _ = model(x)
            loss = criterion(output, y.view(-1))
            """
            hidden=None
            for k in range(2,max_lenth):
                data1=data_[:k]
                input_, target = data1[:-1, :], data1[1:, :]
                output, hidden = model(input_,hidden)
                loss = criterion(output, target.view(-1))
                optimizer.step()
            """
            loss.backward()
            optimizer.step()
            total_loss += (loss.item())
            count += 1
        print(epoch, 'loss=', total_loss / count)
        torch.save(model.state_dict(), 'model.bin')
        chars = test(model)
        print(chars)
Exemplo n.º 4
0
    def decompress(self, compressedfile):
        start = time.time()
        filename_split = compressedfile.split('_')
        checkpoint = torch.load(compressedfile, map_location=self.device)
        body = checkpoint['bytes']
        dictionary = Dictionary()
        dictionary.word2idx = checkpoint['word2idx']
        dictionary.idx2word = checkpoint['idx2word']
        context_map = Context(dictionary)
        ntokens = len(dictionary)
        model = RNNModel('LSTM',
                         ntokens,
                         200,
                         200,
                         2,
                         dropout=0.2,
                         tie_weights=False)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(self.device)
        model.eval()
        bit_string = ''
        join_body = list(body)
        for i in join_body:
            bit_string += "{0:08b}".format(i)
        encoded_text = self.remove_padding(bit_string)
        # decompress start here
        current_code = ''
        decoded_text = ''
        # we define an initial context
        # then we predict the initial huffman tree
        # read bits until we get to a leaf
        # convert the leaf to a char and add it to decompressed text
        # update the context and repeat the process
        context = ['<s>'] * 10

        def tree_from_context(context):
            huffman = HuffmanCoding()
            prob = huffman.make_context_frequency_dict(
                context,
                model,
                context_map,
                self.device,
                threshold=self.args.threshold)
            huffman.make_heap_node(prob)
            huffman.merge_nodes()
            huffman.encode()
            huffman.reverse_mapping = {v: k for k, v in huffman.codes.items()}
            return huffman

        huffman = tree_from_context(context)
        fixed_huffman = HuffmanCoding()
        counts = checkpoint['fixed_huffman_counts']
        fixed_huffman.make_heap_node(counts)
        fixed_huffman.merge_nodes()
        fixed_huffman.encode()
        fixed_huffman.reverse_mapping = {
            v: k
            for k, v in fixed_huffman.codes.items()
        }
        flag = None
        for bit in encoded_text:
            if flag == '0':
                current_code += bit
                if current_code in huffman.reverse_mapping:
                    next_char = huffman.reverse_mapping[current_code]
                    decoded_text += next_char
                    current_code = ''
                    context = context[1:] + [next_char]
                    huffman = tree_from_context(context)
                    flag = None
                continue
            elif flag == '1':
                current_code += bit
                if current_code in fixed_huffman.reverse_mapping:
                    next_char = fixed_huffman.reverse_mapping[current_code]
                    decoded_text += next_char
                    current_code = ''
                    context = context[1:] + [next_char]
                    huffman = tree_from_context(context)
                    flag = None
                continue
            else:
                flag = bit
        # write decompressed file
        with open(filename_split[0] + "_decompressed.txt", 'w') as f:
            f.writelines(decoded_text)
        print('Decompression Done!')
        end = time.time()
        print(round((end - start), 3), "s")
Exemplo n.º 5
0
        model = RNNModel(encoder.encoding_size,
                         args.hidden_size,
                         len(corpora.vocab),
                         args.layers,
                         encoder,
                         dropout=args.dropout)

    criterion = torch.nn.CrossEntropyLoss()
    trainer = Trainer(model, corpora, criterion, device, logger,
                      args.batch_size, args.seq_len, args.lr,
                      args.log_interval, args.clip_grad)
    best_valid_loss = float("inf")
    for epoch in range(args.epochs):
        print('Time at the start of epoch {} is {}'.format(
            epoch, datetime.now()))
        trainer.train()
        valid_loss = evaluate(model, corpora, criterion, device)
        print('Validation loss: {:.2f}. Perplexity: {:.2f}'.format(
            valid_loss, math.exp(valid_loss)))
        if args.log_dir:
            logger.log_valid(epoch, valid_loss)
        save_checkpoint(model.to(torch.device('cpu')), args.checkpoint,
                        valid_loss, args)
        model = model.to(device)

        # Anneal the learning rate if the validation loss hasn't improved.
        if (valid_loss - best_valid_loss) < -0.01:
            best_valid_loss = valid_loss
        else:
            trainer.learning_rate /= 4.0