示例#1
0
def import_new_features(node_eui, exclude_list):
    coverage = CoverageMap(node_eui)

    for feature in get_new_features(node_eui, exclude_list):
        if coverage.exists(feature):
            logger.info('Found last imported data point, stopping.')
            break

        gateway_eui = feature['properties']['gateway_eui']
        if not coverage.gateway_seen(gateway_eui):
            gateway_details = rest_api.fetch_gateway(gateway_eui)
            gateway = features.build_gateway(gateway_details)
            coverage.add_gateway(gateway)

        coverage.add(feature)

    coverage.save_all()
示例#2
0
def import_new_features(node_eui, exclude_list):
    coverage = CoverageMap(node_eui)

    for feature in get_new_features(node_eui, exclude_list):
        if coverage.exists(feature):
            logger.info('Found last imported data point, stopping.')
            break

        gateway_eui = feature['properties']['gateway_eui']
        if not coverage.gateway_seen(gateway_eui):
            gateway_details = rest_api.fetch_gateway(gateway_eui)
            gateway = features.build_gateway(gateway_details)
            coverage.add_gateway(gateway)

        coverage.add(feature)

    coverage.save_all()
示例#3
0
client = airsim.CarClient()
client.confirmConnection()
client.enableApiControl(True)
car_controls = airsim.CarControls()

# let the car start driving
car_controls.throttle = 0.6
car_controls.steering = 0
client.setCarControls(car_controls)

# create coverage map and connect to client
start_point = [-290.0, 10050.0, 10.0]
covMap = CoverageMap(start_point=[0, 0, 0],
                     map_size=64000,
                     scale_ratio=20,
                     state_size=6000,
                     input_size=20,
                     height_threshold=0.9,
                     reward_norm=30,
                     paint_radius=15)
covMap.set_client(client=client)

# create experiments directories
#experiment_dir = os.path.join(os.path.expanduser('~'), 'Documents\\AirSim', datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
experiment_dir = os.path.join(
    'C:\\Users\\t-dezado\\OneDrive - Microsoft\\Documents\\AirSim',
    datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))
images_dir = os.path.join(experiment_dir, 'images')
os.makedirs(images_dir)

# create txt file
airsim_rec = open(os.path.join(experiment_dir, "airsim_rec.txt"), "w")
示例#4
0
    rewards_log.close()

    # connect to airsim
    client = connect_to_airsim()
    car_controls = airsim.CarControls()

    # initiate coverage map

    #start_point = [840.0, 1200.0, 32.0]
    start_point = [-1200.0, -500.0, 62.000687]

    #coverage_map = CoverageMap(start_point=start_point, map_size=12000, scale_ratio=1, state_size=4000, input_size=84, height_threshold=0.9, reward_norm=3000.0)
    coverage_map = CoverageMap(start_point=start_point,
                               map_size=12000,
                               scale_ratio=20,
                               state_size=6000,
                               input_size=20,
                               height_threshold=0.9,
                               reward_norm=30,
                               paint_radius=15)
    coverage_map.set_client(client)

    # let the car drive a bit
    car_controls.throttle = 0.3
    car_controls.steering = 0
    client.setCarControls(car_controls)
    time.sleep(0.5)

    # Make RL agent
    NumBufferFrames = 4
    SizeRows = 84
    SizeCols = 84
    def __init__(self, parameters):
        required_parameters = [
            'data_dir', 'max_epoch_runtime_sec', 'replay_memory_size',
            'batch_size', 'min_epsilon', 'per_iter_epsilon_reduction',
            'experiment_name', 'train_conv_layers', 'start_x', 'start_y',
            'start_z', 'log_path'
        ]
        for required_parameter in required_parameters:
            if required_parameter not in parameters:
                raise ValueError('Missing required parameter {0}'.format(
                    required_parameter))

        parameters['role_type'] = 'agent'

        print('Starting time: {0}'.format(datetime.datetime.utcnow()),
              file=sys.stderr)
        self.__model_buffer = None
        self.__model = None
        self.__airsim_started = False
        self.__data_dir = parameters['data_dir']
        self.__per_iter_epsilon_reduction = float(
            parameters['per_iter_epsilon_reduction'])
        self.__min_epsilon = float(parameters['min_epsilon'])
        self.__max_epoch_runtime_sec = float(
            parameters['max_epoch_runtime_sec'])
        self.__replay_memory_size = int(parameters['replay_memory_size'])
        self.__batch_size = int(parameters['batch_size'])
        self.__experiment_name = parameters['experiment_name']
        self.__train_conv_layers = bool(
            (parameters['train_conv_layers'].lower().strip() == 'true'))
        self.__epsilon = 1
        self.__num_batches_run = 0
        self.__last_checkpoint_batch_count = 0

        if 'batch_update_frequency' in parameters:
            self.__batch_update_frequency = int(
                parameters['batch_update_frequency'])

        if 'weights_path' in parameters:
            self.__weights_path = parameters['weights_path']
        else:
            self.__weights_path = None

        if 'airsim_path' in parameters:
            self.__airsim_path = parameters['airsim_path']
        else:
            self.__airsim_path = None

        self.__local_run = 'local_run' in parameters

        self.__car_client = None
        self.__car_controls = None

        self.__minibatch_dir = os.path.join(self.__data_dir, 'minibatches')
        self.__output_model_dir = os.path.join(self.__data_dir, 'models')

        self.__make_dir_if_not_exist(self.__minibatch_dir)
        self.__make_dir_if_not_exist(self.__output_model_dir)
        self.__last_model_file = ''

        self.__possible_ip_addresses = []
        self.__trainer_ip_address = None

        self.__experiences = {}

        self.__start_point = [
            float(parameters['start_x']),
            float(parameters['start_y']),
            float(parameters['start_z'])
        ]
        self.__log_file = parameters['log_path']

        # initiate coverage map
        self.__coverage_map = CoverageMap(start_point=self.__start_point,
                                          map_size=12000,
                                          scale_ratio=20,
                                          state_size=6000,
                                          input_size=20,
                                          height_threshold=0.9,
                                          reward_norm=30,
                                          paint_radius=15)

        # create txt file
        if not os.path.isdir(
                os.path.join(self.__data_dir, '\\checkpoint',
                             self.__experiment_name)):
            os.makedirs(
                os.path.join(self.__data_dir, '\\checkpoint',
                             self.__experiment_name))
        self.__rewards_log = open(
            os.path.join(self.__data_dir, '\\checkpoint',
                         self.__experiment_name, "rewards.txt"), "w")
        self.__rewards_log.write("Timestamp\tSum\tMean\n")
        self.__rewards_log.close()

        # create starting points list
        #self.__starting_points = self.__get_starting_points()
        self.__starting_points = [self.__start_point]
class DistributedAgent():
    def __init__(self, parameters):
        required_parameters = [
            'data_dir', 'max_epoch_runtime_sec', 'replay_memory_size',
            'batch_size', 'min_epsilon', 'per_iter_epsilon_reduction',
            'experiment_name', 'train_conv_layers', 'start_x', 'start_y',
            'start_z', 'log_path'
        ]
        for required_parameter in required_parameters:
            if required_parameter not in parameters:
                raise ValueError('Missing required parameter {0}'.format(
                    required_parameter))

        parameters['role_type'] = 'agent'

        print('Starting time: {0}'.format(datetime.datetime.utcnow()),
              file=sys.stderr)
        self.__model_buffer = None
        self.__model = None
        self.__airsim_started = False
        self.__data_dir = parameters['data_dir']
        self.__per_iter_epsilon_reduction = float(
            parameters['per_iter_epsilon_reduction'])
        self.__min_epsilon = float(parameters['min_epsilon'])
        self.__max_epoch_runtime_sec = float(
            parameters['max_epoch_runtime_sec'])
        self.__replay_memory_size = int(parameters['replay_memory_size'])
        self.__batch_size = int(parameters['batch_size'])
        self.__experiment_name = parameters['experiment_name']
        self.__train_conv_layers = bool(
            (parameters['train_conv_layers'].lower().strip() == 'true'))
        self.__epsilon = 1
        self.__num_batches_run = 0
        self.__last_checkpoint_batch_count = 0

        if 'batch_update_frequency' in parameters:
            self.__batch_update_frequency = int(
                parameters['batch_update_frequency'])

        if 'weights_path' in parameters:
            self.__weights_path = parameters['weights_path']
        else:
            self.__weights_path = None

        if 'airsim_path' in parameters:
            self.__airsim_path = parameters['airsim_path']
        else:
            self.__airsim_path = None

        self.__local_run = 'local_run' in parameters

        self.__car_client = None
        self.__car_controls = None

        self.__minibatch_dir = os.path.join(self.__data_dir, 'minibatches')
        self.__output_model_dir = os.path.join(self.__data_dir, 'models')

        self.__make_dir_if_not_exist(self.__minibatch_dir)
        self.__make_dir_if_not_exist(self.__output_model_dir)
        self.__last_model_file = ''

        self.__possible_ip_addresses = []
        self.__trainer_ip_address = None

        self.__experiences = {}

        self.__start_point = [
            float(parameters['start_x']),
            float(parameters['start_y']),
            float(parameters['start_z'])
        ]
        self.__log_file = parameters['log_path']

        # initiate coverage map
        self.__coverage_map = CoverageMap(start_point=self.__start_point,
                                          map_size=12000,
                                          scale_ratio=20,
                                          state_size=6000,
                                          input_size=20,
                                          height_threshold=0.9,
                                          reward_norm=30,
                                          paint_radius=15)

        # create txt file
        if not os.path.isdir(
                os.path.join(self.__data_dir, '\\checkpoint',
                             self.__experiment_name)):
            os.makedirs(
                os.path.join(self.__data_dir, '\\checkpoint',
                             self.__experiment_name))
        self.__rewards_log = open(
            os.path.join(self.__data_dir, '\\checkpoint',
                         self.__experiment_name, "rewards.txt"), "w")
        self.__rewards_log.write("Timestamp\tSum\tMean\n")
        self.__rewards_log.close()

        # create starting points list
        #self.__starting_points = self.__get_starting_points()
        self.__starting_points = [self.__start_point]

    # Starts the agent
    def start(self):
        self.__run_function()

    # The function that will be run during training.
    # It will initialize the connection to the trainer, start AirSim, and continuously run training iterations.
    def __run_function(self):
        print('Starting run function')

        # Once the trainer is online, it will write its IP to a file in (data_dir)\trainer_ip\trainer_ip.txt
        # Wait for that file to exist
        if not self.__local_run:
            print('Waiting for trainer to come online')
            while True:
                trainer_ip_dir = os.path.join(
                    os.path.join(self.__data_dir, 'trainer_ip'),
                    self.__experiment_name)
                print('Checking {0}...'.format(trainer_ip_dir))
                if os.path.isdir(trainer_ip_dir):
                    with open(os.path.join(trainer_ip_dir, 'trainer_ip.txt'),
                              'r') as f:
                        self.__possible_ip_addresses.append(f.read().replace(
                            '\n', ''))
                        break
                print('Not online yet. Sleeping...')
                time.sleep(5)

            # We now have the IP address for the trainer. Attempt to ping the trainer.
            ping_idx = -1
            while True:
                ping_idx += 1
                print('Attempting to ping trainer...')
                try:
                    print('\tPinging {0}...'.format(
                        self.__possible_ip_addresses[ping_idx % len(
                            self.__possible_ip_addresses)]))
                    response = requests.get('http://{0}:80/ping'.format(
                        self.__possible_ip_addresses[ping_idx % len(
                            self.__possible_ip_addresses)])).json()
                    if response['message'] != 'pong':
                        raise ValueError(
                            'Received unexpected message: {0}'.format(
                                response))
                    print('Success!')
                    self.__trainer_ip_address = self.__possible_ip_addresses[
                        ping_idx % len(self.__possible_ip_addresses)]
                    break
                except Exception as e:
                    print('Could not get response. Message is {0}'.format(e))
                    if (ping_idx % len(self.__possible_ip_addresses) == 0):
                        print('Waiting 5 seconds and trying again...')
                        time.sleep(5)

            # Get the latest model from the trainer
            print('Getting model from the trainer')
            sys.stdout.flush()
            buffer_len = 4
            self.__model = RlModel(weights_path=self.__weights_path,
                                   train_conv_layers=self.__train_conv_layers,
                                   buffer_len=buffer_len)
            self.__get_latest_model()

        else:
            print('Run is local. Skipping connection to trainer.')
            buffer_len = 4
            self.__model = RlModel(weights_path=self.__weights_path,
                                   train_conv_layers=self.__train_conv_layers,
                                   buffer_len=buffer_len)

        # Connect to the AirSim exe
        self.__connect_to_airsim()

        # Fill the replay memory by driving randomly.
        print('Filling replay memory...')
        while True:
            print('Running Airsim Epoch.')
            try:
                _, num_of_actions = self.__run_airsim_epoch(True)
                if num_of_actions > 0:
                    percent_full = 100.0 * len(self.__experiences['actions']
                                               ) / self.__replay_memory_size
                    print(
                        'Replay memory now contains {0} members. ({1}% full)'.
                        format(len(self.__experiences['actions']),
                               percent_full))

                    if (percent_full >= 100.0):
                        break
            except msgpackrpc.error.TimeoutError:
                print(
                    'Lost connection to AirSim while filling replay memory. Attempting to reconnect.'
                )
                self.__connect_to_airsim()

        # Get the latest model. Other agents may have finished before us.
        print('Replay memory filled. Starting main loop...')

        if not self.__local_run:
            self.__get_latest_model()
        while True:
            try:
                if (self.__model is not None):

                    #Generate a series of training examples by driving the vehicle in AirSim
                    print('Running Airsim Epoch.')
                    experiences, frame_count = self.__run_airsim_epoch(False)

                    # If we didn't immediately crash, train on the gathered experiences
                    if (frame_count > 0):
                        print('Generating {0} minibatches...'.format(
                            frame_count))

                        print('Sampling Experiences.')
                        # Sample experiences from the replay memory
                        sampled_experiences = self.__sample_experiences(
                            experiences, frame_count, True)

                        self.__num_batches_run += frame_count

                        # If we successfully sampled, train on the collected minibatches and send the gradients to the trainer node
                        if (len(sampled_experiences) > 0):
                            print('Publishing AirSim Epoch.')

                            # write all rewards to log file
                            self.__rewards_log = open(
                                os.path.join(self.__data_dir, 'checkpoint',
                                             self.__experiment_name,
                                             "rewards.txt"), "a+")
                            rewards_sum = 0
                            for reward in sampled_experiences['rewards']:
                                rewards_sum += reward
                            self.__rewards_log.write("{}\t{}\t{}\n".format(
                                time.time(), rewards_sum, rewards_sum /
                                len(sampled_experiences['rewards'])))
                            self.__rewards_log.close()

                            self.__publish_batch_and_update_model(
                                sampled_experiences, frame_count)

            # Occasionally, the AirSim exe will stop working.
            # For example, if a user connects to the node to visualize progress.
            # In that case, attempt to reconnect.
            except msgpackrpc.error.TimeoutError:
                print('Lost connection to AirSim. Attempting to reconnect.')
                self.__connect_to_airsim()

    # Connects to the AirSim Exe.
    # Assume that it is already running. After 10 successive attempts, attempt to restart the executable.
    def __connect_to_airsim(self):
        attempt_count = 0
        while True:
            try:
                print('Attempting to connect to AirSim (attempt {0})'.format(
                    attempt_count))
                self.__car_client = airsim.CarClient()
                self.__car_client.confirmConnection()
                self.__car_client.enableApiControl(True)
                self.__car_controls = airsim.CarControls()
                self.__coverage_map.set_client(
                    client=self.__car_client)  # update client on coverage map
                print('Connected!')
                return
            except:
                print('Failed to connect.')
                attempt_count += 1
                if (attempt_count % 10 == 0):
                    print(
                        '10 consecutive failures to connect. Attempting to start AirSim on my own.'
                    )

                    if self.__local_run:
                        os.system('START "" powershell.exe {0}'.format(
                            os.path.join(
                                self.__airsim_path,
                                'AD_Cookbook_Start_AirSim.ps1 neighborhood -windowed'
                            )))
                    else:
                        os.system(
                            'START "" powershell.exe D:\\AD_Cookbook_AirSim\\Scripts\\DistributedRL\\restart_airsim_if_agent.ps1'
                        )
                print('Waiting a few seconds.')
                time.sleep(10)

    # Appends a sample to a ring buffer.
    # If the appended example takes the size of the buffer over buffer_size, the example at the front will be removed.
    def __append_to_ring_buffer(self, item, buffer, buffer_size):
        if (len(buffer) >= buffer_size):
            buffer = buffer[1:]
        buffer.append(item)
        return buffer

    # Runs an interation of data generation from AirSim.
    # Data will be saved in the replay memory.
    def __run_airsim_epoch(self, always_random):
        print('Running AirSim epoch.')

        # reset coverage map
        self.__coverage_map.reset()

        # Pick a random starting point on the roads
        starting_points, starting_direction = self.__get_next_starting_point()

        # Initialize the state buffer.
        # For now, save 4 images at 0.01 second intervals.
        state_buffer_len = 4
        state_buffer = []
        wait_delta_sec = 0.01

        print('Getting Pose')
        self.__car_client.simSetVehiclePose(
            airsim.Pose(
                airsim.Vector3r(starting_points[0], starting_points[1],
                                starting_points[2]),
                toQuaternion(starting_direction[0], starting_direction[1],
                             starting_direction[2])), True)

        # Currently, simSetVehiclePose does not allow us to set the velocity.
        # So, if we crash and call simSetVehiclePose, the car will be still moving at its previous velocity.
        # We need the car to stop moving, so push the brake and wait for a few seconds.
        print('Waiting for momentum to die')
        self.__car_controls.steering = 0
        self.__car_controls.throttle = 0
        self.__car_controls.brake = 1
        self.__car_client.setCarControls(self.__car_controls)
        time.sleep(4)

        print('Resetting')
        self.__car_client.simSetVehiclePose(
            airsim.Pose(
                airsim.Vector3r(starting_points[0], starting_points[1],
                                starting_points[2]),
                toQuaternion(starting_direction[0], starting_direction[1],
                             starting_direction[2])), True)

        #Start the car rolling so it doesn't get stuck
        print('Running car for a few seconds...')
        self.__car_controls.steering = 0
        self.__car_controls.throttle = 0.4
        self.__car_controls.brake = 0
        self.__car_client.setCarControls(self.__car_controls)

        # While the car is rolling, start initializing the state buffer
        stop_run_time = datetime.datetime.now() + datetime.timedelta(seconds=1)
        while (datetime.datetime.now() < stop_run_time):
            time.sleep(wait_delta_sec)
            image, _ = self.__get_image()
            state_buffer = self.__append_to_ring_buffer(
                image, state_buffer, state_buffer_len)
        done = False
        actions = []  #records the state we go to
        pre_states = []
        post_states = []
        rewards = []
        predicted_rewards = []
        car_state = self.__car_client.getCarState()

        # slow down a bit
        self.__car_controls.throttle = 0.3
        self.__car_client.setCarControls(self.__car_controls)

        start_time = datetime.datetime.utcnow()
        end_time = start_time + datetime.timedelta(
            seconds=self.__max_epoch_runtime_sec)

        num_random = 0

        # Main data collection loop
        while not done:
            collision_info = self.__car_client.simGetCollisionInfo()
            utc_now = datetime.datetime.utcnow()

            # Check for terminal conditions:
            # 1) Car has collided
            # 2) Car is stopped
            # 3) The run has been running for longer than max_epoch_runtime_sec.
            #       This constraint is so the model doesn't end up having to churn through huge chunks of data, slowing down training
            if (collision_info.has_collided or abs(car_state.speed) < 0.02
                    or utc_now > end_time):
                print('Start time: {0}, end time: {1}'.format(
                    start_time, utc_now),
                      file=sys.stderr)
                if (utc_now > end_time):
                    print('timed out.')
                    print(
                        'Full autonomous run finished at {0}'.format(utc_now),
                        file=sys.stderr)
                done = True
                sys.stderr.flush()
            else:

                # The Agent should occasionally pick random action instead of best action
                do_greedy = np.random.random_sample()
                pre_state = copy.deepcopy(state_buffer)
                if (do_greedy < self.__epsilon or always_random):
                    num_random += 1
                    next_state = self.__model.get_random_state()
                    predicted_reward = 0
                else:
                    next_state, predicted_reward, _ = self.__model.predict_state(
                        pre_state)

                    print('Model predicts {0}'.format(next_state))

                # Convert the selected state to a control signal
                next_steering, next_brake = self.__model.state_to_control_signals(
                    next_state, self.__car_client.getCarState())

                # Take the action
                self.__car_controls.steering = next_steering
                self.__car_controls.brake = next_brake
                self.__car_client.setCarControls(self.__car_controls)

                # Wait for a short period of time to see outcome
                time.sleep(wait_delta_sec)

                # Observe outcome and compute reward from action
                post_image, cov_reward = self.__get_image()

                state_buffer = self.__append_to_ring_buffer(
                    post_image, state_buffer, state_buffer_len)
                car_state = self.__car_client.getCarState()
                collision_info = self.__car_client.simGetCollisionInfo()
                reward = self.__compute_reward(collision_info, car_state,
                                               cov_reward, next_state)

                # Add the experience to the set of examples from this iteration
                pre_states.append(pre_state)
                post_states.append(state_buffer)
                rewards.append(reward)
                predicted_rewards.append(predicted_reward)
                actions.append(next_state)

        # Only the last state is a terminal state.
        is_not_terminal = [1 for i in range(0, len(actions) - 1, 1)]
        is_not_terminal.append(0)

        # only add to the replay memory if have enough data
        if len(actions) > 30:
            # Add all of the states from this iteration to the replay memory
            self.__add_to_replay_memory('pre_states', pre_states)
            self.__add_to_replay_memory('post_states', post_states)
            self.__add_to_replay_memory('actions', actions)
            self.__add_to_replay_memory('rewards', rewards)
            self.__add_to_replay_memory('predicted_rewards', predicted_rewards)
            self.__add_to_replay_memory('is_not_terminal', is_not_terminal)

            print('Percent random actions: {0}'.format(num_random /
                                                       max(1, len(actions))))
            print('Num total actions: {0}'.format(len(actions)))

            # If we are in the main loop, reduce the epsilon parameter so that the model will be called more often
            # Note: this will be overwritten by the trainer's epsilon if running in distributed mode
            if not always_random:
                self.__epsilon -= self.__per_iter_epsilon_reduction
                self.__epsilon = max(self.__epsilon, self.__min_epsilon)

            return self.__experiences, len(actions)
        else:
            return self.__experiences, 0

    # Adds a set of examples to the replay memory
    def __add_to_replay_memory(self, field_name, data):
        if field_name not in self.__experiences:
            self.__experiences[field_name] = data
        else:
            self.__experiences[field_name] += data
            start_index = max(
                0,
                len(self.__experiences[field_name]) -
                self.__replay_memory_size)
            self.__experiences[field_name] = self.__experiences[field_name][
                start_index:]

    # Sample experiences from the replay memory
    def __sample_experiences(self, experiences, frame_count, sample_randomly):
        sampled_experiences = {}
        sampled_experiences['pre_states'] = []
        sampled_experiences['post_states'] = []
        sampled_experiences['actions'] = []
        sampled_experiences['rewards'] = []
        sampled_experiences['predicted_rewards'] = []
        sampled_experiences['is_not_terminal'] = []

        # Compute the surprise factor, which is the difference between the predicted an the actual Q value for each state.
        # We can use that to weight examples so that we are more likely to train on examples that the model got wrong.
        suprise_factor = np.abs(
            np.array(experiences['rewards'], dtype=np.dtype(float)) -
            np.array(experiences['predicted_rewards'], dtype=np.dtype(float)))
        suprise_factor_normalizer = np.sum(suprise_factor)
        suprise_factor /= float(suprise_factor_normalizer)

        # Generate one minibatch for each frame of the run
        for _ in range(0, frame_count, 1):
            if sample_randomly:
                idx_set = set(
                    np.random.choice(list(range(0, suprise_factor.shape[0],
                                                1)),
                                     size=(self.__batch_size),
                                     replace=False))
            else:
                idx_set = set(
                    np.random.choice(list(range(0, suprise_factor.shape[0],
                                                1)),
                                     size=(self.__batch_size),
                                     replace=False,
                                     p=suprise_factor))

            sampled_experiences['pre_states'] += [
                experiences['pre_states'][i] for i in idx_set
            ]
            sampled_experiences['post_states'] += [
                experiences['post_states'][i] for i in idx_set
            ]
            sampled_experiences['actions'] += [
                experiences['actions'][i] for i in idx_set
            ]
            sampled_experiences['rewards'] += [
                experiences['rewards'][i] for i in idx_set
            ]
            sampled_experiences['predicted_rewards'] += [
                experiences['predicted_rewards'][i] for i in idx_set
            ]
            sampled_experiences['is_not_terminal'] += [
                experiences['is_not_terminal'][i] for i in idx_set
            ]

        return sampled_experiences

    # Train the model on minibatches and post to the trainer node.
    # The trainer node will respond with the latest version of the model that will be used in further data generation iterations.
    def __publish_batch_and_update_model(self, batches, batches_count):
        # Train and get the gradients
        print(
            'Publishing epoch data and getting latest model from parameter server...'
        )
        gradients = self.__model.get_gradient_update_from_batches(batches)

        # Post the data to the trainer node
        if not self.__local_run:
            post_data = {}
            post_data['gradients'] = gradients
            post_data['batch_count'] = batches_count

            response = requests.post('http://{0}:80/gradient_update'.format(
                self.__trainer_ip_address),
                                     json=post_data)
            print('Response:')
            print(response)

            new_model_parameters = response.json()

            # Update the existing model with the new parameters
            self.__model.from_packet(new_model_parameters)

            #If the trainer sends us a epsilon, allow it to override our local value
            if ('epsilon' in new_model_parameters):
                new_epsilon = float(new_model_parameters['epsilon'])
                print(
                    'Overriding local epsilon with {0}, which was sent from trainer'
                    .format(new_epsilon))
                self.__epsilon = new_epsilon

        else:
            if (self.__num_batches_run > self.__batch_update_frequency +
                    self.__last_checkpoint_batch_count):
                self.__model.update_critic()

                checkpoint = {}
                checkpoint['model'] = self.__model.to_packet(get_target=True)
                checkpoint['batch_count'] = batches_count
                checkpoint_str = json.dumps(checkpoint)

                checkpoint_dir = os.path.join(
                    os.path.join(self.__data_dir, 'checkpoint'),
                    self.__experiment_name)

                if not os.path.isdir(checkpoint_dir):
                    try:
                        os.makedirs(checkpoint_dir)
                    except OSError as e:
                        if e.errno != errno.EEXIST:
                            raise

                file_name = os.path.join(
                    checkpoint_dir, '{0}.json'.format(self.__num_batches_run))
                with open(file_name, 'w') as f:
                    print('Checkpointing to {0}'.format(file_name))
                    f.write(checkpoint_str)

                self.__last_checkpoint_batch_count = self.__num_batches_run

    # Gets the latest model from the trainer node
    def __get_latest_model(self):
        print('Getting latest model from parameter server...')
        response = requests.get('http://{0}:80/latest'.format(
            self.__trainer_ip_address)).json()
        self.__model.from_packet(response)

    # Gets a coverage image from AirSim
    def __get_cov_image(self):

        state, cov_reward = self.__coverage_map.get_state_from_pose()

        # debug only
        #im = Image.fromarray(np.uint8(state))
        #im.save("DistributedRL\\debug\\{}.png".format(time.time()))

        # normalize state
        state = state / 255.0

        return state, cov_reward

    # Gets an image from AirSim
    def __get_image(self):

        responses = self.__car_client.simGetImages([
            airsim.ImageRequest("RCCamera", airsim.ImageType.DepthPerspective,
                                True, False)
        ])
        img1d = np.array(responses[0].image_data_float, dtype=np.float)

        if img1d.size > 1:

            img1d = 255 / np.maximum(np.ones(img1d.size), img1d)
            img2d = np.reshape(img1d,
                               (responses[0].height, responses[0].width))

            image = Image.fromarray(img2d)

            # debug only
            #image_png = image.convert('RGB')
            #image_png.save("DistributedRL\\debug\\{}.png".format(time.time()))

            depth_im = np.array(image.resize((84, 84)).convert('L'))
            depth_im = depth_im / 255.0
        else:
            depth_im = np.zeros((84, 84)).astype(float)

        cov_im, cov_reward = self.__get_cov_image()
        depth_im[:cov_im.shape[0], :cov_im.shape[1]] = cov_im

        #image = Image.fromarray(depth_im)
        #image.save("DistributedRL\\debug\\{}.png".format(time.time()))

        return depth_im, cov_reward
        """
        image_response = self.__car_client.simGetImages([airsim.ImageRequest("RCCamera", airsim.ImageType.Scene, False, False)])[0]
        image1d = np.fromstring(image_response.image_data_uint8, dtype=np.uint8)
        if image1d.size > 1:
            image_rgba = image1d.reshape(image_response.height, image_response.width, 4)

            #im = Image.fromarray(np.uint8(image_rgba))
            #im.save("DistributedRL\\debug\\{}.png".format(time.time()))

            image_rgba = image_rgba / 255.0

            return image_rgba[60:144,86:170,0:3].astype(float)
        
        return np.zeros((84,84,3)).astype(float)
        """

    # Computes the reward functinon based on collision.
    def __compute_reward(self, collision_info, car_state, cov_reward, action):

        alpha = 1.0

        # If the car has collided, the reward is always zero
        if (collision_info.has_collided):
            return 0.0

        # If the car has stopped for some reason, the reward is always zero
        if abs(car_state.speed) < 0.02:
            return 0.0

        # If there is no new coverage, there is no reward
        if cov_reward < 0.1:
            return 0.0

        # straight will be rewarded as 1.0, semi straight as 0.5
        direction_reward = float(2 - abs(action - 2)) / 2.0

        # final reward
        reward = alpha * cov_reward + (1 - alpha) * direction_reward

        #print("cov reward: {}, reward: {}".format(cov_reward, reward))
        return reward

    # prepare starting points list
    def __get_starting_points(self):

        starting_points_file = open(
            os.path.join(self.__data_dir, 'data\\starting_points.txt'))
        starting_points_list = []

        for line in starting_points_file:
            starting_points_list.append(
                [float(x) for x in line.split(' ')[:3]])

        return starting_points_list

    # get most newly generated random point
    def __get_next_generated_random_point(self):
        """
        # grab the newest line with generated random point
        newest_rp = "None"

        # keep searching until the simulation is giving something
        while newest_rp == "None":
            
            # notify user
            print("Searching for a random point...")

            # open log file
            log_file = open(self.__log_file, "r")

            # search for the newest generated random point line
            for line in log_file:
                if "RandomPoint" in line:
                    newest_rp = line
            
        # notify user
        print("Found random point.")
        
        # filter random point from line
        random_point = [float(newest_rp.split(" ")[-3].split("=")[1]), float(newest_rp.split(" ")[-2].split("=")[1]), float(newest_rp.split(" ")[-1].split("=")[1])]
        return random_point
        """
        idx = randint(0, len(self.__starting_points) - 1)
        return self.__starting_points[idx]

    # Randomly selects a starting point on the road
    # Used for initializing an iteration of data generation from AirSim
    def __get_next_starting_point(self):

        # get random start point from log file, and make it relative to agent's starting point
        random_start_point = self.__get_next_generated_random_point()
        random_start_point = [
            random_start_point[0] - self.__start_point[0],
            random_start_point[1] - self.__start_point[1],
            random_start_point[2] - self.__start_point[2]
        ]
        random_start_point = [x / 100.0 for x in random_start_point]

        # draw random orientation
        #random_direction = (0, 0, np.random.uniform(-math.pi,math.pi))
        random_direction = (0, 0, 0)

        # Get the current state of the vehicle
        car_state = self.__car_client.getCarState()

        # The z coordinate is always zero
        random_start_point[2] = -0
        return (random_start_point, random_direction)

    # A helper function to make a directory if it does not exist
    def __make_dir_if_not_exist(self, directory):
        if not (os.path.exists(directory)):
            try:
                os.makedirs(directory)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise
buffer_len = 4

model = RlModel(weights_path=None,
                train_conv_layers=False,
                exp_type=args.type,
                buffer_len=buffer_len)
with open(args.path, 'r') as f:
    checkpoint_data = json.loads(f.read())
    model.from_packet(checkpoint_data['model'])

# initiate coverage map
start_point = [1150.0, -110.0, 32.0]
coverage_map = CoverageMap(start_point=start_point,
                           map_size=12000,
                           scale_ratio=20,
                           state_size=args.state_size,
                           input_size=40,
                           height_threshold=0.9,
                           reward_norm=args.reward_norm)

print('Connecting to AirSim...')
car_client = airsim.CarClient()
car_client.confirmConnection()
car_client.enableApiControl(True)
car_controls = airsim.CarControls()
coverage_map.set_client(client=car_client)
print('Connected!')


def append_to_ring_buffer(item, rgb_item, buffer, rgb_buffer, buffer_size):
    if (len(buffer) >= buffer_size):