示例#1
0
    def __init__(self,
                 timesteps=32,
                 gamma=0.99,
                 epsilon=1.0,
                 epsilon_min=0.01,
                 epsilon_log_decay=0.99,
                 alpha=0.01):
        self.supervisor = Supervisor()
        self.robot_node = self.supervisor.getFromDef("MY_BOT")
        if self.robot_node is None:
            sys.stderr.write(
                "No DEF MY_ROBOT node found in the current world file\n")
            sys.exit(1)
        self.trans_field = self.robot_node.getField("translation")
        self.rot_field = self.robot_node.getField("rotation")
        self.timestep = timesteps

        self.camera = Camera('camera')
        self.camera.enable(self.timestep)
        self.init_image = self.get_image()

        self.timestep = timesteps
        self.receiver = Receiver('receiver')
        self.receiver.enable(self.timestep)
        self.emitter = Emitter('emitter')

        self.memory = deque(maxlen=50000)
        self.batch_size = 128
        self.alpha = alpha
        self.gamma = gamma
        self.epsion_init = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_log_decay

        self.pre_state = self.init_image
        self.pre_action = -1

        self.pre_go_straight = False
        self.reward = 0
        self.step = 0
        self.max_step = 200
        self.file = None

        # interactive
        self.feedbackProbability = 0
        self.feedbackAccuracy = 1
        self.PPR = False
        self.feedbackTotal = 0
        self.feedbackAmount = 0

        self.init_model()
        self.init_parametter()
    def setupDevice(self):
        self.leftMotor = self.robot.getDevice('left wheel motor')
        self.rightMotor = self.robot.getDevice('right wheel motor')
        self.leftMotor.setPosition(float('inf'))
        self.rightMotor.setPosition(float('inf'))

        self.rightDistanceSensor = self.robot.getDevice('ds1')
        self.leftDistanceSensor = self.robot.getDevice('ds0')
        self.rightDistanceSensor.enable(self.timestep)
        self.leftDistanceSensor.enable(self.timestep)

        self.gps = self.robot.getDevice('gps')
        self.touchSensor1 = self.robot.getDevice('touch_sensor1')
        self.touchSensor2 = self.robot.getDevice('touch_sensor2')
        self.touchSensor3 = self.robot.getDevice('touch_sensor3')
        self.touchSensor4 = self.robot.getDevice('touch_sensor4')
        self.touchSensor5 = self.robot.getDevice('touch_sensor5')
        self.gps.enable(self.timestep)
        self.touchSensor1.enable(self.timestep)
        self.touchSensor2.enable(self.timestep)
        self.touchSensor3.enable(self.timestep)
        self.touchSensor4.enable(self.timestep)
        self.touchSensor5.enable(self.timestep)

        self.camera = Camera('camera')
        self.camera.enable(self.timestep)

        self.leftMotor.setVelocity(0)
        self.rightMotor.setVelocity(0)

        self.init_leftValue = self.leftDistanceSensor.getValue()
        self.init_rightValue = self.rightDistanceSensor.getValue()

        self.receiver = Receiver('receiver')
        self.emitter = Emitter('emitter')
        self.receiver.enable(self.timestep)
class RobotController:
    def __init__(self, max_steps=2, init_position=(0, 0, 0), final_position=(-0.3, 0, 0.3), max_speed=3,
                 ):
        self.robot = Robot()

        self.timestep = int(self.robot.getBasicTimeStep())
        self.max_steps = max_steps
        self.max_speed = max_speed

        self.setupDevice()

        self.init_position = init_position
        self.current_position = init_position
        self.final_position = final_position

        self.done = False

        # Interactive
        self.feedbackAmount = 0

        # self.policy_reuse = PPR()

    def setupDevice(self):
        self.leftMotor = self.robot.getDevice('left wheel motor')
        self.rightMotor = self.robot.getDevice('right wheel motor')
        self.leftMotor.setPosition(float('inf'))
        self.rightMotor.setPosition(float('inf'))

        self.rightDistanceSensor = self.robot.getDevice('ds1')
        self.leftDistanceSensor = self.robot.getDevice('ds0')
        self.rightDistanceSensor.enable(self.timestep)
        self.leftDistanceSensor.enable(self.timestep)

        self.gps = self.robot.getDevice('gps')
        self.touchSensor1 = self.robot.getDevice('touch_sensor1')
        self.touchSensor2 = self.robot.getDevice('touch_sensor2')
        self.touchSensor3 = self.robot.getDevice('touch_sensor3')
        self.touchSensor4 = self.robot.getDevice('touch_sensor4')
        self.touchSensor5 = self.robot.getDevice('touch_sensor5')
        self.gps.enable(self.timestep)
        self.touchSensor1.enable(self.timestep)
        self.touchSensor2.enable(self.timestep)
        self.touchSensor3.enable(self.timestep)
        self.touchSensor4.enable(self.timestep)
        self.touchSensor5.enable(self.timestep)

        self.camera = Camera('camera')
        self.camera.enable(self.timestep)

        self.leftMotor.setVelocity(0)
        self.rightMotor.setVelocity(0)

        self.init_leftValue = self.leftDistanceSensor.getValue()
        self.init_rightValue = self.rightDistanceSensor.getValue()

        self.receiver = Receiver('receiver')
        self.emitter = Emitter('emitter')
        self.receiver.enable(self.timestep)

    def is_collised(self):
        if (self.touchSensor1.getValue() + self.touchSensor2.getValue() +
                self.touchSensor3.getValue() + self.touchSensor4.getValue() +
                self.touchSensor5.getValue() > 0):
            print(1, self.touchSensor1.getValue())
            print(2, self.touchSensor2.getValue())
            print(3, self.touchSensor3.getValue())
            print(4, self.touchSensor4.getValue())
            print(5, self.touchSensor5.getValue())
            return True
        gpsValue = self.gps.getValues()
        self.current_position = gpsValue
        if (self.current_position[0] < - 0.5 or self.current_position[0] > 0.5 or
                self.current_position[2] < - 0.5 or self.current_position[2] > 0.5):
            return True

        return False

    def step(self, a):
        if not self.done:
            self.robot.step(self.timestep)
            if not self.is_collised():
                leftValue = self.leftDistanceSensor.getValue()
                rightValue = self.rightDistanceSensor.getValue()
                reward = -0.1
                leftSpeed, rightSpeed = 0, 0
                if a == 0:
                    leftSpeed, rightSpeed = self.turnLeft(leftValue, rightValue)
                elif a == 1:
                    leftSpeed, rightSpeed = self.turnRight(leftValue, rightValue)
                elif a == 2:
                    leftSpeed, rightSpeed = self.goStraight()
                    reward = 0
                # elif a == 3:
                #     leftSpeed, rightSpeed = self.goSlow()

                self.leftMotor.setVelocity(leftSpeed)
                self.rightMotor.setVelocity(rightSpeed)

                # set observation .............

                observations = leftValue, rightValue

                # set reward .............

                # set done .........
                r = self.set_done()
                return observations, reward + r, self.done, False

            else:
                observations = self.reset()
                reward = -100
                return observations, reward, False, True

        return None, None, self.done, False

    def set_done(self):
        gpsValue = self.gps.getValues()
        self.current_position = gpsValue
        if abs(self.current_position[0] - self.final_position[0]) <= 0.08 and \
                abs(self.current_position[2] - self.final_position[2]) <= 0.08:
            self.done = True
            return 1000
        return 0

    def random_action(self):
        return random.choice(self.action_space())

    def goStraight(self):
        return self.max_speed, self.max_speed

    def goSlow(self):
        return self.max_speed / 4, self.max_speed / 4

    def turnLeft(self, leftDistance, rightDistance):
        return -(leftDistance / 100), (rightDistance / 100) + 0.5

    def turnRight(self, leftDistance, rightDistance):
        return (leftDistance / 100) + 0.5, -(rightDistance / 100)

    def reset(self):
        self.done = False
        return self.init_leftValue, self.init_rightValue

    def send_to_super(self, message, data):
        data = message, data
        dataSerialized = pickle.dumps(data)
        self.emitter.send(dataSerialized)

    def receive_handle(self):
        if self.receiver.getQueueLength() > 0:
            data = self.receiver.getData()
            message, action, step = pickle.loads(data)
            self.receiver.nextPacket()
            if message == 'step':
                obs, r, d, i = self.step(action)
                data = obs, r, d, i, step
                self.send_to_super('step_done', data)
            if message == 'reset':
                obs = self.reset()
                self.send_to_super('reset_done', obs)

        return -1

    def start(self):
        while self.robot.step(self.timestep) != -1:
            self.receive_handle()
示例#4
0
    def random_zombie(self):

        self.robotNode = self.getSelf()
        self.translationField = self.robotNode.getField('translation')
        self.rotationField = self.robotNode.getField("rotation")

        youbotNode = self.getFromDef('Youbot')
        youbotTranslationField = youbotNode.getField('translation')

        for i in range(0, self.BODY_PARTS_NUMBER):
            self.joints_position_field.append(
                self.robotNode.getField(self.joint_names[i]))

        timer = 0
        self.time_step = int(self.getBasicTimeStep())
        goal = [5, 5]
        time_at_berry = 0
        emitter = Emitter("emitter")
        message = struct.pack("chd", b"p", 100, 120.08)
        emitter.setChannel(-1)
        emitter.setRange(4)
        while not self.step(self.time_step) == -1:
            self.translation = self.translationField.getSFVec3f()
            self.move_zombie(self.translation[0], self.translation[2], goal[0],
                             goal[1])

            if (timer == 32):  #only change movement once every second
                timer = 0
                emitter.send(message)
                youbotTranslation = youbotTranslationField.getSFVec3f()

                berryDistance = self.check_berry_close(self.translation)
                if (berryDistance[0] != 30) and (time_at_berry < 10):
                    #print (berryDistance)
                    goal = [berryDistance[0], berryDistance[2]]
                    time_at_berry = time_at_berry + 1

                elif (self.youbotDistance(youbotTranslation, self.translation)
                      < 3):  #if robot close, chase it
                    x_goal = youbotTranslation[0]
                    z_goal = youbotTranslation[2]
                    #goal = [x_goal, z_goal]
                    goal = [youbotTranslation[0], youbotTranslation[2]]
                    #print ("close to robot")

                else:  #choose a random spot
                    if (random.randint(0, 5) == 2):
                        goal = [
                            random.randint(
                                int(self.translation[0]) - 5,
                                int(self.translation[0]) + 5),
                            random.randint(
                                int(self.translation[2]) - 5,
                                int(self.translation[2]) + 5)
                        ]
                        if (goal[0] < -12):
                            goal[0] = -12
                        if (goal[0] > 12):
                            goal[0] = 12
                        if (goal[1] < -12):
                            goal[1] = -12
                        if (goal[1] > 12):
                            goal[1] = 12
                if ((goal[0] <= -1) and (self.translation[0] > -1)
                        and (self.translation[2] > 0)):
                    goal[0] = -0.5
                if ((goal[0] >= -1) and (self.translation[0] < -1)
                        and (self.translation[2] > 0)):
                    goal[0] = -1.5
                if ((goal[1] <= -5) and (self.translation[2] > -5)
                        and (self.translation[0] > -4)):
                    goal[1] = -4.5
                if ((goal[1] >= -5) and (self.translation[2] < -5)
                        and (self.translation[0] > -4)):
                    goal[1] = -5.5
                if (time_at_berry < 30 and time_at_berry > 10):
                    #print ("waiting till next berry")
                    time_at_berry = time_at_berry + 1
                if (time_at_berry > 29):  #8 seconds of berry cooldown
                    time_at_berry = 0
                #print (time_at_berry)
            timer = timer + 1
示例#5
0
    def random_zombie(self):

        self.robotNode = self.getSelf()
        self.translationField = self.robotNode.getField('translation')
        self.rotationField = self.robotNode.getField("rotation")

        youbotNode = self.getFromDef('Youbot')
        youbotTranslationField = youbotNode.getField('translation')

        self.translation = self.translationField.getSFVec3f()
        self.start_x = self.translation[0]
        self.start_z = self.translation[2]

        for i in range(0, self.BODY_PARTS_NUMBER):
            self.joints_position_field.append(
                self.robotNode.getField(self.joint_names[i]))

        timer = 0
        self.time_step = int(self.getBasicTimeStep())
        goal = [5, 5]
        emitter = Emitter("emitter")
        message = struct.pack("chd", b"a", 100, 120.08)
        emitter.setChannel(-1)
        emitter.setRange(4)
        while not self.step(self.time_step) == -1:
            self.translation = self.translationField.getSFVec3f()
            self.move_zombie(self.translation[0], self.translation[2], goal[0],
                             goal[1])

            if (timer == 32):  #only change movement once every second
                timer = 0
                emitter.send(message)
                youbotTranslation = youbotTranslationField.getSFVec3f()
                if (self.youbotDistance(youbotTranslation, self.translation) <
                        3):  #if robot close, chase it
                    x_goal = youbotTranslation[0]
                    z_goal = youbotTranslation[2]
                    if ((x_goal > self.start_x - 3)
                            and (x_goal < self.start_x + 3)
                            and (z_goal > self.start_z - 3)
                            and (z_goal < self.start_z + 3)):
                        goal = [youbotTranslation[0], youbotTranslation[2]]
                    else:
                        goal = [
                            random.randint(
                                int(self.start_x) - 3,
                                int(self.start_x) + 3),
                            random.randint(
                                int(self.start_z) - 3,
                                int(self.start_z) + 3)
                        ]
                    #print ("close to robot")

                else:  #choose a random spot
                    if (random.randint(0, 5) == 2):
                        goal = [
                            random.randint(
                                int(self.start_x) - 3,
                                int(self.start_x) + 3),
                            random.randint(
                                int(self.start_z) - 3,
                                int(self.start_z) + 3)
                        ]
                if ((goal[0] <= -1) and (self.translation[0] > -1)
                        and (self.translation[2] > 0)):
                    goal[0] = -0.5
                if ((goal[0] >= -1) and (self.translation[0] < -1)
                        and (self.translation[2] > 0)):
                    goal[0] = -1.5
                if ((goal[1] <= -5) and (self.translation[2] > -5)
                        and (self.translation[0] > -4)):
                    goal[1] = -4.5
                if ((goal[1] >= -5) and (self.translation[2] < -5)
                        and (self.translation[0] > -4)):
                    goal[1] = -5.5

            timer = timer + 1
示例#6
0
from controller import Supervisor, Node
from controller import Robot, Motor, DistanceSensor, Emitter
from ikpy.chain import Chain
import ikpy
import numpy as np
import time

TIME_STEP = 32

sup = Supervisor()
emitter = Emitter("emitter")
emitter.setChannel(1)


ball_node = sup.getFromDef("BALL")
trans_field = ball_node.getField("translation")
ball_node.setVelocity([1,0,0])



while sup.step(TIME_STEP) != -1:
    values = trans_field.getSFVec3f()
    supl = str(values[0]) + " " + str(values[1]) + " " + str(values[2]) + " "
    bytes = supl.encode()
    emitter.send(bytes)
    
   # print("Ball is at position: %g %g %g" % (values[0], values[1], values[2]))
    
示例#7
0
class SupervisorController:
    def __init__(self,
                 timesteps=32,
                 gamma=0.99,
                 epsilon=1.0,
                 epsilon_min=0.01,
                 epsilon_log_decay=0.99,
                 alpha=0.01):
        self.supervisor = Supervisor()
        self.robot_node = self.supervisor.getFromDef("MY_BOT")
        if self.robot_node is None:
            sys.stderr.write(
                "No DEF MY_ROBOT node found in the current world file\n")
            sys.exit(1)
        self.trans_field = self.robot_node.getField("translation")
        self.rot_field = self.robot_node.getField("rotation")
        self.timestep = timesteps

        self.camera = Camera('camera')
        self.camera.enable(self.timestep)
        self.init_image = self.get_image()

        self.timestep = timesteps
        self.receiver = Receiver('receiver')
        self.receiver.enable(self.timestep)
        self.emitter = Emitter('emitter')

        self.memory = deque(maxlen=50000)
        self.batch_size = 128
        self.alpha = alpha
        self.gamma = gamma
        self.epsion_init = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_log_decay

        self.pre_state = self.init_image
        self.pre_action = -1

        self.pre_go_straight = False
        self.reward = 0
        self.step = 0
        self.max_step = 200
        self.file = None

        # interactive
        self.feedbackProbability = 0
        self.feedbackAccuracy = 1
        self.PPR = False
        self.feedbackTotal = 0
        self.feedbackAmount = 0

        self.init_model()
        self.init_parametter()

    def init_model(self):
        self.main_network = self.build_network()
        self.target_network = self.build_network()
        self.agent_network = self.build_network()
        self.generalise_model = self.init_gereral_model()
        self.pca_model = self.init_pca_model()

    def init_parametter(self):
        self.epsilon = self.epsion_init
        self.episode = 0
        self.policy_reuse = PPR()

    def init_gereral_model(self):
        n_clusters = 2
        return KMeans(n_clusters=n_clusters, n_init=10)

    def init_pca_model(self):
        n_component = 100
        return PCA(n_components=100, random_state=22)

    def get_image(self):
        image = self.camera.getImage()
        if image is None:
            empty_image = np.zeros((64, 64, 3))
            return Image.fromarray(empty_image.astype(np.uint8))
        else:
            return self.toPIL(image)

    def toPIL(self, bytes_data):
        imgPIL = Image.frombytes('RGBA', (64, 64), bytes_data)
        imgPIL = imgPIL.convert('RGB')
        return imgPIL

    def image_process(self, PIL):
        array = np.array(PIL)
        array = array / 255
        return np.reshape(array, list((1, ) + array.shape))

    def save_image(self, PIL, ep, step):
        PIL.save(resultsFolder + 'images/' + str(ep) + '_' + str(step) +
                 '.png')

    def update_target(self):
        self.target_network.set_weights(self.main_network.get_weights())

    def observation_space(self):
        return self.observation_space

    def get_epsilon(self, t):
        return max(
            self.epsilon_min,
            min(self.epsilon, 1.0 - math.log10((t + 1) * self.epsilon_decay)))

    def build_network(self):
        model = Sequential()
        model.add(Input(shape=(64, 64, 3)))
        model.add(Conv2D(4, kernel_size=8, activation='linear',
                         padding='same'))
        model.add(MaxPooling2D((2, 2), padding='same'))
        model.add(Conv2D(8, kernel_size=4, activation='linear',
                         padding='same'))
        model.add(MaxPooling2D((2, 2), padding='same'))
        model.add(
            Conv2D(16, kernel_size=2, activation='linear', padding='same'))
        model.add(MaxPooling2D((2, 2), padding='same'))
        model.add(Flatten())
        model.add(Dense(256, activation='linear'))
        model.add(Dense(len(self.action_space()), activation='softmax'))
        opt = Nadam(learning_rate=self.alpha)
        model.compile(loss='mse', optimizer=opt)
        return model

    def save_reward(self, file, rewards, totals, feedbacks):
        pairs = {'Reward': rewards, 'Total': totals, 'Feedback': feedbacks}
        data_df = pd.DataFrame.from_dict(pairs)
        data_df.to_csv(file)

    def save_model(self, file):
        self.main_network.save_weights(file)
        self.save_generalise_model(file)

    def save_generalise_model(self, filename):
        obs = [s[5] for s in self.memory]
        with open(filename + 'gel', "wb") as f:
            pickle.dump(self.generalise_model, f)
        with open(filename + 'pca', "wb") as f:
            pickle.dump(self.pca_model, f)
        with open(filename + 'state', "wb") as f_:
            pickle.dump(obs, f_)

    def load_generalise_model(self, filename):
        with open(filename + 'gel', "rb") as f:
            print(filename + 'gel')
            self.generalise_model = pickle.load(f)
        with open(filename + 'pca', "rb") as f:
            self.pca_model = pickle.load(f)

    def load_model(self, file):
        self.agent_network.load_weights(file + '.model')
        self.load_generalise_model(file + '.model')
        self.update_target()

    def finalise(self, rewards, totals, feedbacks, ppr):
        file = self.file + '_' + str(self.feedbackProbability) + '_' + str(
            self.feedbackAccuracy) + str(ppr)
        self.save_reward(file + '.csv', rewards, totals, feedbacks)
        self.save_model(file + '.model')

    def get_group(self, state):
        # nx, ny, nz = state[0].shape
        # state = state.reshape(nx * ny * nz)
        # state = [state]
        # new_state = self.pca_model.transform(state)

        nx, ny, nz = state[0].shape
        image_grayscale = state[0].mean(axis=2).astype(np.float32)
        image_grayscale = image_grayscale.reshape(nx * ny)
        image_grayscale = [image_grayscale]
        return self.generalise_model.predict(image_grayscale)[0]

    def memorize(self, state, action, reward, next_state, done, obs):
        self.memory.append((state, action, reward, next_state, done, obs))

    def updatePolicy(self, batchSize=0):
        if batchSize == 0:
            batchSize = self.batch_size
        if len(self.memory) < batchSize:
            self.trainNetwork(len(self.memory))
            return  # do nothing
        self.trainNetwork(batchSize)
        return

    def trainNetwork(self, batch_size):

        # sample a mini batch of transition from the replay buffer
        minibatch = random.sample(self.memory, batch_size)
        states = []
        targets = []

        for state, action, reward, next_state, done, obs in minibatch:
            state_processed = self.image_process(state)
            next_state_processed = self.image_process(next_state)

            if not done:
                target = self.target_network.predict(next_state_processed)
                target_Q = (reward + self.gamma * np.max(target[0]))
            else:
                target_Q = reward
            # compute the Q value using the main network
            Q_values = self.main_network.predict(state_processed)
            Q_values[0][action] = target_Q
            states.append(state_processed[0])
            targets.append(Q_values[0])
        # train the main network
        states = np.array(states)
        targets = np.array(targets)
        self.main_network.fit(states, targets, epochs=1, verbose=0)

    def normal_action(self, state, epsilon=0.1):
        # exploration
        if np.random.random() <= epsilon:
            action = self.random_action()
            # PPR:
            if self.PPR:
                group = self.get_group(state)
                redoAction, rate = self.policy_reuse.get(group)
                # print(group, rate)
                if (np.random.rand() < rate):
                    action = redoAction
            # end PPR:

        # exploitation
        else:
            action = np.argmax(self.main_network.predict(state))
        return action

    def action_space(self):
        """
        0: left
        1: right
        2: straight
        """
        return [0, 1, 2]

    def random_action(self):
        # if np.random.rand() < 0.5:
        #     return 2
        # else:
        #     return random.choice([0, 1])
        return random.choice(self.action_space())

    def propose_action(self, obs):
        return

    def has_obstacle(self, leftValue, rightValue):
        return leftValue > 500 or rightValue > 500

    def back_to_begin(self):
        INITIAL = [0, 0, 0]
        self.trans_field.setSFVec3f(INITIAL)
        ROT_INITIAL = [0, 1, 0, 3.2]
        self.rot_field.setSFRotation(ROT_INITIAL)

    def reset(self):
        self.pre_state = self.init_image
        self.pre_action = -1

        self.pre_go_straight = False
        self.reward = 0
        self.step = 0
        self.finish = False

        self.feedbackTotal = 0
        self.feedbackAmount = 0

        self.back_to_begin()
        self.send_to_robot('reset', None)

    def propose_new_action(self, obs):
        left, right = obs
        obstacle_flag = self.has_obstacle(left, right)

        pre_state_processed = self.image_process(self.pre_state)
        if not obstacle_flag:
            action = 2
            self.pre_action = action
            self.pre_go_straight = True
        else:

            # propose new action ------------------
            if self.PPR:
                self.policy_reuse.step()
            if np.random.rand() < self.feedbackProbability:
                # get advice
                trueAction = np.argmax(
                    self.agent_network.predict(pre_state_processed))

                # PPR:
                if self.PPR:
                    group = self.get_group(pre_state_processed)
                    self.policy_reuse.add(group, trueAction)
                # end PPR:

                if np.random.rand() < self.feedbackAccuracy:
                    action = trueAction
                else:
                    while True:
                        action = self.random_action()
                        if action != trueAction:
                            break
                self.feedbackAmount += 1
            else:
                action = self.normal_action(pre_state_processed, self.epsilon)
            self.pre_go_straight = False
            self.feedbackTotal += 1

            self.pre_action = action
        return action

    def execute(self, obs, reward, done, info):

        state = self.get_image()
        if self.pre_action != -1:
            self.reward += reward
            if self.step == self.max_step or done:
                if done:
                    self.save_image(state, self.episode, self.step)
                    self.save_image(self.pre_state, self.episode,
                                    self.step - 1)

                self.memorize(self.pre_state, self.pre_action, reward, state,
                              done, obs)
                self.updatePolicy(self.step)
                self.update_target()

                if self.epsilon > self.epsilon_min:
                    self.epsilon *= self.epsilon_decay
                self.episode += 1
                self.finish = True
                return

            if info:
                self.back_to_begin()

            if not self.pre_go_straight:
                self.memorize(self.pre_state, self.pre_action, reward, state,
                              done, obs)
                self.pre_state = state

        return

    def receive_handle(self):
        send_message, send_data = None, None
        if self.receiver.getQueueLength() > 0:
            data = self.receiver.getData()
            message, d = pickle.loads(data)
            if message == 'step_done':
                obs, r, d, i, s = d
                # print(s, self.step - 1, self.pre_action, r)  # check synchronize

                self.execute(obs, r, d, i)
                if not self.finish:
                    action = self.propose_new_action(obs)
                    self.send_to_robot('step', action)
                    self.step += 1
            if message == 'reset_done':
                obs = d
                self.execute(obs, 0, False, False)
                action = self.propose_new_action(obs)
                self.send_to_robot('step', action)
            if message == 'obstacle':
                self.back_to_begin()

            self.receiver.nextPacket()
        return

    def send_to_robot(self, message, data):
        data = message, data, self.step
        dataSerialized = pickle.dumps(data)
        self.emitter.send(dataSerialized)

    def start(self,
              max_step,
              episodes,
              file,
              feedbackP=0,
              feedbackA=1,
              PPR=False):
        self.file = file
        self.max_step = max_step
        self.feedbackProbability = feedbackP
        self.feedbackAccuracy = feedbackA
        self.PPR = PPR
        rewards = []
        feedbackTotal = []
        feedbackAmount = []
        self.init_parametter()
        for i in range(episodes):
            self.reset()
            self.episode = i
            while self.supervisor.step(
                    self.timestep) != -1 and not self.finish:
                self.receive_handle()
            print(i, self.reward, self.feedbackTotal, self.feedbackAmount)
            rewards.append(self.reward)
            feedbackTotal.append(self.feedbackTotal)
            feedbackAmount.append(self.feedbackAmount)
        self.finalise(rewards, feedbackTotal, feedbackAmount, PPR)