示例#1
0
    def compile(self):
        if self.ptx is None:

            program = _NVRTCProgram(kernel.encode(),
                                    "recurrent_forget_mult.cu".encode())
            GPUForgetMult.ptx = program.compile()

        if torch.cuda.current_device() not in GPUForgetMult.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function("recurrent_forget_mult")
            self.bwd_forget_mult = m.get_function("bwd_recurrent_forget_mult")

            Stream = namedtuple("Stream", ["ptr"])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            GPUForgetMult.configured_gpus[torch.cuda.current_device()] = (
                self.forget_mult,
                self.bwd_forget_mult,
                self.stream,
            )

        (
            self.forget_mult,
            self.bwd_forget_mult,
            self.stream,
        ) = GPUForgetMult.configured_gpus[torch.cuda.current_device()]
示例#2
0
    def compile(self):
        if self.ptx is None:
            
            program = _NVRTCProgram(kernel.encode(), 'recurrent_forget_mult.cu'.encode())
            GPUForgetMult.ptx = program.compile()

        if torch.cuda.current_device() not in GPUForgetMult.configured_gpus:
            m = function.Module()
            m.load(bytes(self.ptx.encode()))

            self.forget_mult = m.get_function('recurrent_forget_mult')
            self.bwd_forget_mult = m.get_function('bwd_recurrent_forget_mult')

            Stream = namedtuple('Stream', ['ptr'])
            self.stream = Stream(ptr=torch.cuda.current_stream().cuda_stream)

            GPUForgetMult.configured_gpus[torch.cuda.current_device()] = (self.forget_mult, self.bwd_forget_mult, self.stream)

        self.forget_mult, self.bwd_forget_mult, self.stream = GPUForgetMult.configured_gpus[torch.cuda.current_device()]