Exemplos de reinforcement_net em Python, exemplos de models.reinforcement_net em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: trainer.py Projeto: sean85914/rl_pnp

    def __init__(self, suck_rewards, grasp_rewards, discount_factor, testing,
                 force_cpu):

        if torch.cuda.is_available() and not force_cpu:
            print "CUDA detected, use GPU."
            self.use_cuda = True
        elif force_cpu:
            print "Force CPU."
            self.use_cuda = False
        else:
            print "No CUDA detected, use CPU."
            self.use_cuda = False

        # Model
        self.model = reinforcement_net(self.use_cuda, 4)
        #self.target = reinforcement_net(self.use_cuda, 4)
        self.target = copy.deepcopy(self.model)

        if self.use_cuda:
            self.model = self.model.cuda()
            self.target = self.target.cuda()
        self.suck_rewards = suck_rewards
        self.grasp_rewards = grasp_rewards
        self.discount_factor = discount_factor

        # Huber loss
        self.criterion = torch.nn.SmoothL1Loss(reduce=False)
        if self.use_cuda:
            self.criterion = self.criterion.cuda()
        # Set model to train mode
        self.model.train()
        self.target.train()

        # Initialize optimizer
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=1e-4,
                                         momentum=0.9,
                                         weight_decay=2e-5)
        self.iteration = 0

Exemplo n.º 2

0

Exibir arquivo

    def __init__(self, method, push_rewards, future_reward_discount,
                 is_testing, load_snapshot, snapshot_file, force_cpu):

        self.method = method

        # Check if CUDA can be used
        if torch.cuda.is_available() and not force_cpu:
            print("CUDA detected. Running with GPU acceleration.")
            self.use_cuda = True
        elif force_cpu:
            print("CUDA detected, but overriding with option '--cpu'. Running with only CPU.")
            self.use_cuda = False
        else:
            print("CUDA is *NOT* detected. Running with only CPU.")
            self.use_cuda = False

        # Fully convolutional classification network for supervised learning
        if self.method == 'reactive':
            self.model = reactive_net(self.use_cuda)

            # Initialize classification loss
            push_num_classes = 3 # 0 - push, 1 - no change push, 2 - no loss
            push_class_weights = torch.ones(push_num_classes)
            push_class_weights[push_num_classes - 1] = 0
            if self.use_cuda:
                self.push_criterion = CrossEntropyLoss2d(push_class_weights.cuda()).cuda()
            else:
                self.push_criterion = CrossEntropyLoss2d(push_class_weights)
            grasp_num_classes = 3 # 0 - grasp, 1 - failed grasp, 2 - no loss
            grasp_class_weights = torch.ones(grasp_num_classes)
            grasp_class_weights[grasp_num_classes - 1] = 0
            if self.use_cuda:
                self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights.cuda()).cuda()
            else:
                self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights)

        # Fully convolutional Q network for deep reinforcement learning
        elif self.method == 'reinforcement':
            self.model = reinforcement_net(self.use_cuda)
            self.push_rewards = push_rewards
            self.future_reward_discount = future_reward_discount

            # Initialize Huber loss
            self.criterion = torch.nn.SmoothL1Loss(reduce=False) # Huber loss
            if self.use_cuda:
                self.criterion = self.criterion.cuda()

        # Load pre-trained model
        if load_snapshot:
            print('snapshot_file', snapshot_file)
            self.model.load_state_dict(torch.load(snapshot_file))
            print('Pre-trained model snapshot loaded from: %s' % (snapshot_file))

        # Convert model from CPU to GPU
        if self.use_cuda:
            self.model = self.model.cuda()

        # Set model to training mode
        self.model.train()

        # Initialize optimizer
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-4, momentum=0.9, weight_decay=2e-5)
        self.iteration = 0

        # Initialize lists to save execution info and RL variables
        self.executed_action_log = []
        self.label_value_log = []
        self.reward_value_log = []
        self.predicted_value_log = []
        self.use_heuristic_log = []
        self.is_exploit_log = []
        self.clearance_log = []

Exemplo n.º 3

0

Exibir arquivo

Arquivo: trainer.py Projeto: zhuhang0796/grasping-invisible

    def __init__(self, future_reward_discount, is_testing, load_snapshot,
                 snapshot_file, force_cpu):

        # Check if CUDA can be used
        if torch.cuda.is_available() and not force_cpu:
            print("CUDA detected. Running with GPU acceleration.")
            self.use_cuda = True
        elif force_cpu:
            print(
                "CUDA detected, but overriding with option '--cpu'. Running with only CPU."
            )
            self.use_cuda = False
        else:
            print("CUDA is *NOT* detected. Running with only CPU.")
            self.use_cuda = False

        # Fully convolutional Q network for deep reinforcement learning
        self.model = reinforcement_net(self.use_cuda)
        self.future_reward_discount = future_reward_discount

        # Initialize Huber loss
        self.criterion = torch.nn.SmoothL1Loss(reduction='none')  # Huber loss
        if self.use_cuda:
            self.criterion = self.criterion.cuda()

        # Load pre-trained model
        if load_snapshot:
            self.model.load_state_dict(torch.load(snapshot_file))
            print('Pre-trained RL snapshot loaded from: %s' % snapshot_file)

        # Convert model from CPU to GPU
        if self.use_cuda:
            self.model = self.model.cuda()

        # Set model to training mode
        self.model.train()

        # Initialize optimizer
        if is_testing:
            self.optimizer = torch.optim.SGD(self.model.parameters(),
                                             lr=1e-5,
                                             momentum=0.9,
                                             weight_decay=2e-5)
        else:
            self.optimizer = torch.optim.SGD(self.model.parameters(),
                                             lr=1e-4,
                                             momentum=0.9,
                                             weight_decay=2e-5)
        self.iteration = 0

        # Initialize lists to save execution info and RL variables
        self.executed_action_log = []
        self.label_value_log = []
        self.reward_value_log = []
        self.predicted_value_log = []
        self.reposition_log = []
        self.augment_ids = []
        self.target_grasped_log = []
        self.loss_queue = collections.deque([], 10)
        self.loss_rec = []
        self.sync_loss = []
        self.sync_acc = []

Exemplo n.º 4

0

Exibir arquivo

    def __init__(self, method, push_rewards, future_reward_discount,
                 is_testing, load_snapshot, snapshot_file):

        self.method = method

        # Check if CUDA can be used
        if torch.cuda.is_available():
            print("CUDA detected. Running with GPU acceleration.")
            self.use_cuda = True
        else:
            print("CUDA is *NOT* detected. Running with only CPU.")
            self.use_cuda = False

        # Fully convolutional classification network for supervised learning
        if self.method == 'reactive':
            self.model = reactive_net(self.use_cuda)

            # Initialize classification loss
            push_num_classes = 3  # 0 - push, 1 - no change push, 2 - no loss
            push_class_weights = torch.ones(push_num_classes)
            push_class_weights[push_num_classes - 1] = 0
            if self.use_cuda:
                self.push_criterion = CrossEntropyLoss2d(
                    push_class_weights.cuda()).cuda()
            else:
                self.push_criterion = CrossEntropyLoss2d(push_class_weights)
            grasp_num_classes = 3  # 0 - grasp, 1 - failed grasp, 2 - no loss
            grasp_class_weights = torch.ones(grasp_num_classes)
            grasp_class_weights[grasp_num_classes - 1] = 0
            if self.use_cuda:
                self.grasp_criterion = CrossEntropyLoss2d(
                    grasp_class_weights.cuda()).cuda()
            else:
                self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights)

        # Fully convolutional Q network for deep reinforcement learning
        elif self.method == 'reinforcement':
            self.model = reinforcement_net(self.use_cuda)
            self.push_rewards = push_rewards
            self.future_reward_discount = future_reward_discount

            # Initialize Huber loss
            self.criterion = torch.nn.SmoothL1Loss(reduce=False)  # Huber loss
            if self.use_cuda:
                self.criterion = self.criterion.cuda()

        # Load pre-trained model
        if load_snapshot:
            # PyTorch v0.4 removes periods in state dict keys, but no backwards compatibility :(
            loaded_snapshot_state_dict = torch.load(snapshot_file)
            loaded_snapshot_state_dict = OrderedDict([
                (k.replace('conv.1', 'conv1'), v) if k.find('conv.1') else
                (k, v) for k, v in loaded_snapshot_state_dict.items()
            ])
            loaded_snapshot_state_dict = OrderedDict([
                (k.replace('norm.1', 'norm1'), v) if k.find('norm.1') else
                (k, v) for k, v in loaded_snapshot_state_dict.items()
            ])
            loaded_snapshot_state_dict = OrderedDict([
                (k.replace('conv.2', 'conv2'), v) if k.find('conv.2') else
                (k, v) for k, v in loaded_snapshot_state_dict.items()
            ])
            loaded_snapshot_state_dict = OrderedDict([
                (k.replace('norm.2', 'norm2'), v) if k.find('norm.2') else
                (k, v) for k, v in loaded_snapshot_state_dict.items()
            ])
            self.model.load_state_dict(loaded_snapshot_state_dict)

            # self.model.load_state_dict(torch.load(snapshot_file)) # Old loading command pre v0.4

            print('Pre-trained model snapshot loaded from: %s' %
                  (snapshot_file))

        # Convert model from CPU to GPU
        if self.use_cuda:
            self.model = self.model.cuda()

        # Set model to training mode
        self.model.train()

        # Initialize optimizer
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=1e-4,
                                         momentum=0.9,
                                         weight_decay=2e-5)
        self.iteration = 0

        # Initialize lists to save execution info and RL variables
        self.executed_action_log = []
        self.label_value_log = []
        self.reward_value_log = []
        self.predicted_value_log = []
        self.use_heuristic_log = []
        self.is_exploit_log = []
        self.clearance_log = []