Пример #1
0
class Tester():
    def __init__(self, render_flag):
        self.model = DDQN(36, 36)
        self.render_flag = render_flag
        self.width = 6
        self.height = 6
        self.env = MineSweeper(self.width, self.height, 6)
        if (self.render_flag):
            self.renderer = Render(self.env.state)
        self.load_models(20000)

    def get_action(self, state):
        state = state.flatten()
        mask = (1 - self.env.fog).flatten()
        action = self.model.act(state, mask)
        return action

    def load_models(self, number):
        path = "pre-trained\ddqn_dnn" + str(number) + ".pth"
        dict = torch.load(path)
        self.model.load_state_dict(dict['current_state_dict'])
        self.model.epsilon = 0

    def do_step(self, action):
        i = int(action / self.width)
        j = action % self.width

        if (self.render_flag):
            self.renderer.state = self.env.state
            self.renderer.draw()
            self.renderer.bugfix()
        next_state, terminal, reward = self.env.choose(i, j)
        return next_state, terminal, reward
Пример #2
0
    def __init__(self,width,height,bomb_no,render_flag):

        self.width = width
        self.height = height
        self.bomb_no = bomb_no
        self.box_count = width*height
        self.env = MineSweeper(self.width,self.height,self.bomb_no)
        self.current_model = DDQN(self.box_count,self.box_count)
        self.target_model = DDQN(self.box_count,self.box_count)
        self.target_model.eval()
        self.optimizer = torch.optim.Adam(self.current_model.parameters(),lr=0.003,weight_decay=1e-5)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,step_size=2000,gamma=0.95)
        self.target_model.load_state_dict(self.current_model.state_dict())
        self.buffer = Buffer(100000)
        self.gamma = 0.99
        self.render_flag = render_flag
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.90
        self.reward_threshold = 0.12
        self.reward_step = 0.01
        self.batch_size = 4096
        self.tau = 5e-5
        self.log = open("./Logs/ddqn_log.txt",'w')

        if(self.render_flag):
            self.Render = Render(self.env.state)
Пример #3
0
 def __init__(self):
     self.width = 20
     self.height = 20
     self.bombs = 20
     self.env = MineSweeper(self.width, self.height, self.bombs)
     self.renderer = Render(self.env.state)
     self.renderer.state = self.env.state
Пример #4
0
 def __init__(self, render_flag):
     self.model = DDQN(36, 36)
     self.render_flag = render_flag
     self.width = 6
     self.height = 6
     self.env = MineSweeper(self.width, self.height, 6)
     if (self.render_flag):
         self.renderer = Render(self.env.state)
     self.load_models(20000)
Пример #5
0
    def render(self):
        if not self.map:
            self.build()
        
        if self.dry_run:
            self.output_error("Dry run completed successfully...")            

        renderer = Render(self.map,self.image,self.format,self.world_file)
        if self.image:
            renderer.render_file()
        else:
            renderer.print_stream()
        self.rendered = True
Пример #6
0
class Play():
    def __init__(self):
        self.width = 20
        self.height = 20
        self.bombs = 20
        self.env = MineSweeper(self.width, self.height, self.bombs)
        self.renderer = Render(self.env.state)
        self.renderer.state = self.env.state

    def do_step(self, i, j):
        i = int(i / 30)
        j = int(j / 30)
        next_state, terminal, reward = self.env.choose(i, j)
        self.renderer.state = self.env.state
        self.renderer.draw()
        return next_state, terminal, reward
Пример #7
0
    def render(self):
        if not self.map:
            self.build()

        if self.dry_run:
            self.output_error("Dry run completed successfully...")

        renderer = Render(self.map, self.image, self.format, self.world_file)
        if self.image:
            renderer.render_file()
        else:
            renderer.print_stream()
        self.rendered = True
Пример #8
0
def downloadDatasets(datasets_selected):
    if not datasets_selected:
        print('No selected datasets!\n')
        return

    DownloadDatasets(datasets_selected)

    print('\n')


def generateData(datasets_selected, export_format):
    if not datasets_selected:
        print('No selected datasets!\n')
        return
    if not export_format:
        print('No export format selected!\n')
        return

    number_of_sets = int(input('Number of sets: '))

    Sampler(datasets_selected, NUMBER_OF_SETS=number_of_sets)
    Generator(export_format)

    print('\n')


render = Render(downloadDatasets, generateData)
render.init()
while (True):
    render.menu()
Пример #9
0
from asset_manager import AssetManager
from renderer import Render
from level import Level
from cat import Cat

width = 256 * 6
height = 24 * 6

pygame.init()
pygame.display.init()
pygame.display.set_caption("pet")
pygame.display.set_icon(AssetManager.getCat(seq=0))

done = False
fps = 2
renderer = Render(width, height)

cat = Cat()
level = Level(cat)
renderer.render_level(level)
clock = pygame.time.Clock()

while not done:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            done = True
    renderer.render_level(level)
    cat.location += 1
    print(cat.location)

    pygame.display.flip()
Пример #10
0
class Driver():

    def __init__(self,width,height,bomb_no,render_flag):

        self.width = width
        self.height = height
        self.bomb_no = bomb_no
        self.box_count = width*height
        self.env = MineSweeper(self.width,self.height,self.bomb_no)
        self.current_model = DDQN(self.box_count,self.box_count)
        self.target_model = DDQN(self.box_count,self.box_count)
        self.target_model.eval()
        self.optimizer = torch.optim.Adam(self.current_model.parameters(),lr=0.003,weight_decay=1e-5)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,step_size=2000,gamma=0.95)
        self.target_model.load_state_dict(self.current_model.state_dict())
        self.buffer = Buffer(100000)
        self.gamma = 0.99
        self.render_flag = render_flag
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.90
        self.reward_threshold = 0.12
        self.reward_step = 0.01
        self.batch_size = 4096
        self.tau = 5e-5
        self.log = open("./Logs/ddqn_log.txt",'w')

        if(self.render_flag):
            self.Render = Render(self.env.state)

    
    def load_models(self,number):
        path = "./pre-trained/ddqn_dnn"+str(number)+".pth"
        weights = torch.load(path)
        self.current_model.load_state_dict(weights['current_state_dict'])
        self.target_model.load_state_dict(weights['target_state_dict'])
        self.optimizer.load_state_dict(weights['optimizer_state_dict'])
        self.current_model.epsilon = weights['epsilon']


    ### Get an action from the DDQN model by supplying it State and Mask
    def get_action(self,state,mask):
        state = state.flatten()
        mask = mask.flatten()
        action = self.current_model.act(state,mask)
        return action

    ### Does the action and returns Next State, If terminal, Reward, Next Mask
    def do_step(self,action):
        i = int(action/self.width)
        j = action%self.width
        if(self.render_flag):
            self.Render.state = self.env.state
            self.Render.draw()
            self.Render.bugfix()
        next_state,terminal,reward = self.env.choose(i,j)
        next_fog = 1-self.env.fog
        return next_state,terminal,reward,next_fog
    
    ### Reward Based Epsilon Decay 
    def epsilon_update(self,avg_reward):
        if(avg_reward>self.reward_threshold):
            self.current_model.epsilon = max(self.epsilon_min,self.current_model.epsilon*self.epsilon_decay)
            self.reward_threshold+= self.reward_step
    
    def TD_Loss(self):
        ### Samples batch from buffer memory
        state,action,mask,reward,next_state,next_mask,terminal = self.buffer.sample(self.batch_size)

        ### Converts the variabls to tensors for processing by DDQN
        state      = Variable(FloatTensor(float32(state)))
        mask      = Variable(FloatTensor(float32(mask)))
        next_state = FloatTensor(float32(next_state))
        action     = LongTensor(float32(action))
        next_mask      = FloatTensor(float32(next_mask))
        reward     = FloatTensor(reward)
        done       = FloatTensor(terminal)


        ### Predicts Q value for present and next state with current and target model
        q_values      = self.current_model(state,mask)
        next_q_values = self.target_model(next_state,next_mask)

        # Calculates Loss:
        #    If not Terminal:
        #        Loss = (reward + gamma*Q_val(next_state)) - Q_val(current_state)
        #    If Terminal:
        #        Loss = reward - Q_val(current_state)

        q_value          = q_values.gather(1, action.unsqueeze(1)).squeeze(1)
        next_q_value     = next_q_values.max(1)[0]
        expected_q_value = reward + self.gamma * next_q_value * (1 - done)
        loss = (q_value - expected_q_value.detach()).pow(2).mean()
        loss_print = loss.item()    

        # Propagates the Loss
        self.optimizer.zero_grad()
        loss.backward()

        self.optimizer.step()
        self.scheduler.step()

        for target_param, local_param in zip(self.target_model.parameters(), self.current_model.parameters()):
            target_param.data.copy_(self.tau*local_param.data + (1.0-self.tau)*target_param.data)
        return loss_print

    def save_checkpoints(self,batch_no):
        path = "./pre-trained/ddqn_dnn"+str(batch_no)+".pth"
        torch.save({
            'epoch': batch_no,
            'current_state_dict': self.current_model.state_dict(),
            'target_state_dict' : self.target_model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'epsilon':self.current_model.epsilon
        }, path)

    def save_logs(self,batch_no,avg_reward,loss,wins):
        res = [
                    str(batch_no),
                    "\tAvg Reward: ",
                    str(avg_reward),
                    "\t Loss: ",
                    str(loss),
                    "\t Wins: ", 
                    str(wins),
                    "\t Epsilon: ",
                    str(self.current_model.epsilon)
        ]
        log_line = " ".join(res)
        print(log_line)
        self.log.write(log_line+"\n")
        self.log.flush()
Пример #11
0
from simulation import System, loops
from renderer import Render
from math import pi

test = System()
test.add_point(350, 600)
test.add_point(350, 600-170/2)
test.add_point(350, 600-585/2)
test.add_base(0, 215.1/2, 0, -pi)
test.add_line(0, 1, 389.1/2)
test.add_line(1, 2, 856.4/2)
test.add_draw(2)
test.main_loop(end=loops(5000), render=Render([700, 800], {"Line":'black', "Point":'CadetBlue1', "Drive":'maroon', "Drawer":"red"}))
Пример #12
0
    def __init__(self, args):
        ## configs
        self.device = 'cuda:0' if args.gpu else 'cpu'
        self.checkpoint_path = args.checkpoint
        self.detect_human_face = args.detect_human_face
        self.render_video = args.render_video
        self.output_size = args.output_size
        self.image_size = 64
        self.min_depth = 0.9
        self.max_depth = 1.1
        self.border_depth = 1.05
        self.xyz_rotation_range = 60
        self.xy_translation_range = 0.1
        self.z_translation_range = 0
        self.fov = 10  # in degrees
        self.renderer = Render({"device": self.device})

        self.depth_rescaler = lambda d: (1 + d) / 2 * self.max_depth + (
            1 - d) / 2 * self.min_depth  # (-1,1) => (min_depth,max_depth)
        self.depth_inv_rescaler = lambda d: (d - self.min_depth) / (
            self.max_depth - self.min_depth)  # (min_depth,max_depth) => (0,1)
        self.rot_center_depth = (self.min_depth + self.max_depth) / 2

        fx = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180))
        fy = (self.image_size - 1) / 2 / (np.tan(self.fov / 2 * np.pi / 180))
        cx = (self.image_size - 1) / 2
        cy = (self.image_size - 1) / 2
        K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]]
        K = torch.FloatTensor(K).to(self.device)
        self.inv_K = torch.inverse(K).unsqueeze(0)
        self.K = K.unsqueeze(0)

        ## NN models
        self.netD = EDDeconv(cin=3, cout=1, nf=64, zdim=256, activation=None)
        self.netA = EDDeconv(cin=3, cout=3, nf=64, zdim=256)
        self.netL = Encoder(cin=3, cout=4, nf=32)
        self.netV = Encoder(cin=3, cout=6, nf=32)

        self.netD = self.netD.to(self.device)
        self.netA = self.netA.to(self.device)
        self.netL = self.netL.to(self.device)
        self.netV = self.netV.to(self.device)
        self.load_checkpoint()

        self.netD.eval()
        self.netA.eval()
        self.netL.eval()
        self.netV.eval()

        ## face detecter
        if self.detect_human_face:
            from facenet_pytorch import MTCNN
            self.face_detector = MTCNN(select_largest=True, device=self.device)

        ## renderer
        if self.render_video:
            from unsup3d_extended.renderer import Renderer
            assert 'cuda' in self.device, 'A GPU device is required for rendering because the neural_renderer only has GPU implementation.'
            cfgs = {
                'device': self.device,
                'image_size': self.output_size,
                'min_depth': self.min_depth,
                'max_depth': self.max_depth,
                'fov': self.fov,
            }
            self.renderer = Renderer(cfgcc)