示例#1
0
class UsableEncoder:
    def __init__(self, loc, WORD_DICT):
        # BEST_MODEL = "../../dir_HugeFiles/prev_model/skip-best-loss10.237"
        print("Preparing the DataLoader. Loading the word dictionary")
        # WORD_DICT = '../dir_HugeFiles/instructions/skip_inst/skip_instruction.csv.pkl'
        self.d = DataLoader(sentences=[''],
                            word_dict=load_dictionary(WORD_DICT))
        self.encoder = None

        print("Loading encoder from the saved model at {}".format(loc))
        model = UniSkip()
        model.load_state_dict(
            torch.load(loc, map_location=lambda storage, loc: storage))
        self.encoder = model.encoder
        if USE_CUDA:
            self.encoder.cuda(CUDA_DEVICE)
            print('using cuda')

    def encode(self, text):
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i:i + n]

        ret = []

        for chunk in chunks(text, 100):
            #print("encoding chunk of size {}".format(len(chunk)))
            indices = [
                self.d.convert_sentence_to_indices(sentence)
                for sentence in chunk
            ]
            indices = torch.stack(indices)
            indices, _ = self.encoder(indices)
            indices = indices.view(-1, self.encoder.thought_size)
            indices = indices.data.cpu().numpy()

            ret.extend(indices)
        ret = np.array(ret)

        return ret
示例#2
0
class UsableEncoder:
    def __init__(self, loc="./saved_models/skip-best"):  #导入之前训练得到的最好模型
        print("Preparing the DataLoader. Loading the word dictionary")
        self.d = DataLoader(
            sentences=[''],
            word_dict=load_dictionary('./data/dummy_corpus.txt.pkl'))
        self.encoder = None

        print("Loading encoder from the saved model at {}".format(loc))
        model = UniSkip()  #载入模型
        model.load_state_dict(
            torch.load(loc, map_location=lambda storage, loc: storage))
        self.encoder = model.encoder
        if USE_CUDA:
            self.encoder.cuda(CUDA_DEVICE)  #gpu

    def encode(self, text):
        def chunks(l, n):
            """Yield successive n-sized chunks from l."""
            for i in range(0, len(l), n):
                yield l[i:i + n]

        ret = []

        for chunk in chunks(text, 100):  #每次往后增加词
            print("encoding chunk of size {}".format(len(chunk)))
            indices = [
                self.d.convert_sentence_to_indices(sentence)
                for sentence in chunk
            ]
            indices = torch.stack(indices)
            indices, _ = self.encoder(indices)  #编码
            indices = indices.view(-1, self.encoder.thought_size)
            indices = indices.data.cpu().numpy()

            ret.extend(indices)
        ret = np.array(ret)

        return ret