示例#1
0
def generate_grid_data(path,
                       N=30,
                       M=30,
                       num_env=10000,
                       traj_per_env=5,
                       Pmove_succ=1.0,
                       Pobs_succ=1.0):
    """
    :param path: path for data file. use separate folders for training and test data
    :param N: grid rows
    :param M: grid columnts
    :param num_env: number of environments in the dataset (grids)
    :param traj_per_env: number of trajectories per environment (different initial state, goal, initial belief)
    :param Pmove_succ: probability of transition succeeding, otherwise stays in place
    :param Pobs_succ: probability of correct observation, independent in each direction
    """

    params = dotdict({
        'grid_n': N,
        'grid_m': M,
        'Pobst': 0.25,  # probability of obstacles in random grid
        'R_obst': -10,
        'R_goal': 20,
        'R_step': -0.1,
        'discount': 0.99,
        'Pmove_succ': Pmove_succ,
        'Pobs_succ': Pobs_succ,
        'num_action': 5,
        'moves': [[0, 1], [1, 0], [0, -1], [-1, 0],
                  [0, 0]],  # right, down, left, up, stay
        'stayaction': 4,
        'num_obs': 16,
        'observe_directions': [[0, 1], [1, 0], [0, -1], [-1, 0]],
    })

    params['obs_len'] = len(params['observe_directions'])
    params['num_state'] = params['grid_n'] * params['grid_m']
    params['traj_limit'] = 4 * (params['grid_n'] + params['grid_m'])
    params['R_step'] = [params['R_step']] * params['num_action']

    # save params
    if not os.path.isdir(path): os.mkdir(path)
    pickle.dump(dict(params), open(path + "/params.pickle", 'w'), -1)

    # randomize seeds, set to previous value to determinize random numbers
    np.random.seed()
    random.seed()

    # grid domain object
    domain = GridBase(params)

    # make database file
    db = GridBase.create_db(path + "data.hdf5", params, num_env, traj_per_env)

    for env_i in range(num_env):
        print("Generating env %d with %d trajectories " %
              (env_i, traj_per_env))
        domain.generate_trajectories(db, num_traj=traj_per_env)

    print("Done.")
示例#2
0
def parse_args(arglist):

    parser = argparse.ArgumentParser(description='Run training on gridworld')

    parser.add_argument(
        'path',
        help='Path to data folder containing train and test subfolders')
    parser.add_argument('--logpath',
                        default='./log/',
                        help='Path to save log and trained model')

    parser.add_argument('--loadmodel',
                        nargs='*',
                        help='Load model weights from checkpoint')

    parser.add_argument(
        '--eval_samples',
        type=int,
        default=100,
        help='Number of samples to evaluate the learned policy on')
    parser.add_argument(
        '--eval_repeats',
        type=int,
        default=1,
        help=
        'Repeat simulating policy for a given number of times. Use 5 for stochastic domains'
    )

    parser.add_argument('--batch_size',
                        type=int,
                        default=100,
                        help='Size of minibatches for training')
    parser.add_argument(
        '--training_envs',
        type=float,
        default=0.9,
        help=
        'Proportion of training data used for trianing. Remainder will be used for validation'
    )
    parser.add_argument(
        '--step_size',
        type=int,
        default=4,
        help='Number of maximum steps for backpropagation through time')
    parser.add_argument('--lim_traj_len',
                        type=int,
                        default=100,
                        help='Clip trajectories to a maximum length')
    parser.add_argument(
        '--includefailed',
        action='store_true',
        help=
        'Include unsuccessful demonstrations in the training and validation set.'
    )

    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.001,
                        help='Initial learning rate')
    parser.add_argument(
        '--patience_first',
        type=int,
        default=30,
        help=
        'Start decaying learning rate if no improvement for a given number of steps'
    )
    parser.add_argument('--patience_rest',
                        type=int,
                        default=5,
                        help='Patience after decay started')
    parser.add_argument('--decaystep',
                        type=int,
                        default=15,
                        help='Total number of learning rate decay steps')
    parser.add_argument('--epochs',
                        type=int,
                        default=1000,
                        help='Maximum number of epochs')

    parser.add_argument(
        '--cache',
        nargs='*',
        default=['steps', 'envs', 'bs'],
        help='Cache nodes from pytable dataset. Default: steps, envs, bs')

    parser.add_argument(
        '-K',
        '--K',
        type=int,
        default=-1,
        help=
        'Number of iterations of value iteration in QMDPNet. Compute from grid size if negative.'
    )

    args = parser.parse_args(args=arglist)

    # load domain parameters
    params = dotdict(
        pickle.load(open(os.path.join(args.path, 'train/params.pickle'),
                         'rb')))

    # set default K
    if args.K < 0:
        args.K = 3 * params.grid_n

    # combine all parameters to a single dotdict
    for key in vars(args):
        params[key] = getattr(args, key)

    return params
示例#3
0
文件: NNet.py 项目: Weoshin/Amazon
import sys
import numpy as np
import torch
from utils.dotdict import dotdict
from AmazonNet import AmazonNet as annet
from utils.bar import Bar
import torch.optim as optim
from utils.AverageMeter import AverageMeter
import torch.nn.functional as f
sys.path.append('../../')


args = dotdict({
    'lr': 0.001,                        # 学习率or步长
    'dropout': 0.3,                     # dropout率
    'epochs': 10,                       # 每次新传入数据后神经网络的训练次数
    'batch_size': 64,                   #
    'cuda': torch.cuda.is_available(),
    'num_channels': 512,                # 通道数
})


class NNet:
    """
    神经网络训练类
    """
    def __init__(self, game):
        self.board_size = game.board_size
        self.nnet = annet(game, args)
        self.board_x, self.board_y = game.get_board_size()
        self.action_size = game.get_action_size()
def parse_logfile(filenames, source='real', model='lin1'):

    trajectories = []
    states = None
    actions = None

    linear_velocity_scaler = 0.5
    angular_velocity_scaler = 1.5707963267948966

    if isinstance(filenames, str):
        filenames = [filenames]

    t = 0
    re_spin = re.compile(r"^([0-9]+): spin.*: ([0-9\-\.]+)")
    re_action = re.compile(
        r"^([0-9]+): .* POS ([0-9\-\.]+) ([0-9\-\.]+) --.*\] ([0-9\-\.]+) besti"
    )
    re_sys = re.compile(
        r"\[real\]\[info\] action step: ([0-9]+) \| output: \[ *([0-9\-\.]+) +([0-9\-\.]+) *\]"
    )
    re_sym_action1 = re.compile(
        r"^([0-9]+): .* POS ([0-9\-\.]+) ([0-9\-\.]+) --.*\] ([0-9\-\.]+) besti.* \| \[([0-9\-\.]+) ([0-9\-\.]+)\]"
    )
    re_sym_action2 = re.compile(
        r"^([0-9]+): delay[0-9]+ ([0-9\-\.]+) ([0-9\-\.]+) ([0-9\-\.]+) act.* act ([0-9\-\.]+) ([0-9\-\.]+)"
    )  # extract the real action
    re_sym_action3 = re.compile(
        r"^([0-9]+): delay[0-9]+ ([0-9\-\.]+) ([0-9\-\.]+) ([0-9\-\.]+) act ([0-9\-\.]+) ([0-9\-\.]+)"
    )  # extract the intended action

    for filename in filenames:
        with open(filename, 'r') as file:

            for line_i in range(100000):
                line = file.readline()

                # Reset
                m_reset = re.match(r"^Resetting agent", line)
                if m_reset:
                    if states is not None:
                        trajectories.append((states[:t + 1], actions[:t + 1]))
                    states = np.ones((500, 3), np.float) * np.nan
                    actions = np.ones((500, 2), np.float) * np.nan
                    t = 0

                # Steps
                if source == 'sim':
                    m_sym_action = re_sym_action3.match(
                        line)  # intended action

                    if m_sym_action is not None:
                        t, x, y, yaw, act_fwd, act_rot = m_sym_action.groups()
                        t = int(t)
                        assert np.all(np.isnan(states[t]))
                        states[t] = [
                            float(x),
                            float(y),
                            np.deg2rad(float(yaw))
                        ]
                        assert np.all(np.isnan(actions[t]))
                        actions[t] = (float(act_fwd), float(act_rot))
                        # print(line, states[t])

                else:
                    m_spin = re_spin.match(line)
                    m_action = re_action.match(line)
                    m_sys = re_sys.match(line)
                    m_sym_action = re_sym_action1.match(line)

                    if m_spin is not None:
                        t, yaw = m_spin.groups()
                        t = int(t)
                        assert np.all(np.isnan(states[t]))
                        states[t, 2] = float(yaw)
                        print(line, states[t])

                    if m_sym_action is not None:
                        t, x, y, yaw, act_fwd, act_rot = m_sym_action.groups()
                        t = int(t)
                        assert np.all(np.isnan(states[t]))
                        states[t] = [
                            float(x),
                            float(y),
                            np.deg2rad(float(yaw))
                        ]
                        assert np.all(np.isnan(actions[t]))
                        actions[t] = (float(act_fwd), float(act_rot))
                        # print(line, states[t])
                    else:
                        if m_action is not None:
                            t, x, y, yaw = m_action.groups()
                            t = int(t)
                            assert np.all(np.isnan(states[t]))
                            states[t] = [
                                float(x),
                                float(y),
                                np.deg2rad(float(yaw))
                            ]
                            print(line, states[t])

                        if m_sys is not None:
                            assert m_spin is None and m_action is None
                            t, act_fwd, act_rot = m_sys.groups()
                            t = int(t) - 1
                            assert np.all(np.isnan(actions[t]))
                            actions[t] = (float(act_fwd), float(act_rot))
                            print(line, actions[t])

    print("done")

    clean_trajectories = []
    for states, actions in trajectories:
        if np.all(np.isnan(states)):
            continue

        if len(states) < 2:
            continue

        # traj = np.concatenate([states, actions], axis=-1)
        lin_vel = np.linalg.norm(states[1:, :2] - states[:-1, :2], axis=-1)
        ang_vel = states[1:, 2] - states[:-1, 2]
        ang_vel = (ang_vel + np.pi) % (2 * np.pi) - np.pi
        act_fwd = actions[:, 0]  # * linear_velocity_scaler * 0.1
        act_rot = actions[:, 1]  # * angular_velocity_scaler * 0.1

        traj = dotdict(
            dict(
                x=states[:, 0],
                y=states[:, 1],
                yaw=states[:, 2],
                lin_vel=lin_vel,
                ang_vel=ang_vel,
                act_fwd=act_fwd,
                act_rot=act_rot,
                trajlen=len(states),
            ))
        clean_trajectories.append(traj)
        # print (traj)

    # time_delay = 2
    # action_fwd_rescaler = 0.3
    # action_rot_rescaler = 0.5

    errors = []
    scalers = []

    for time_delay in range(5):
        lin_vel_list = []
        act_fwd_list = []
        ang_vel_list = []
        act_rot_list = []
        for traj in clean_trajectories:
            valid_part_start = np.min(np.flatnonzero(np.isfinite(
                traj.lin_vel)))
            # act_t is the reference velocity at t+delay. For one step prediction, we want pairs act[i], vel[i+delay]
            lin_vel = traj.lin_vel[valid_part_start + time_delay:]
            act_fwd = traj.act_fwd[valid_part_start:traj.act_fwd.shape[0] -
                                   time_delay - 1]
            ang_vel = traj.ang_vel[valid_part_start + time_delay:]
            act_rot = traj.act_rot[valid_part_start:traj.act_rot.shape[0] -
                                   time_delay - 1]

            assert len(lin_vel) == len(act_fwd)
            assert len(ang_vel) == len(act_rot)
            assert len(ang_vel) == len(lin_vel)

            lin_vel_list.append(lin_vel)
            act_fwd_list.append(act_fwd)
            ang_vel_list.append(ang_vel)
            act_rot_list.append(act_rot)

        lin_vel = np.concatenate(lin_vel_list)
        act_fwd = np.concatenate(act_fwd_list)
        ang_vel = np.concatenate(ang_vel_list)
        act_rot = np.concatenate(act_rot_list)

        pred_func, sc, err = sysid(lin_vel,
                                   ang_vel,
                                   act_fwd,
                                   act_rot,
                                   model=model)
        scalers.append(sc)
        errors.append(err)

    ang_error_scaler = np.mean(np.abs(lin_vel)) / np.mean(np.abs(ang_vel))
    errors = errors * np.array([1., np.square(ang_error_scaler)])[None]
    time_delay = np.argmin(np.sum(errors, axis=-1))
    print(errors)
    print(time_delay, scalers[time_delay], errors[time_delay])
    # (3, (0.6540917264778654, 0.07866921965909571), array([42.36897857, 36.7125886]))

    best_scalers = scalers[
        time_delay]  # 0.6540917264778654, 0.07866921965909571
    fwd_scaler, rot_scaler = 0.6540917264778654, 0.07866921965909571

    # plots one-step predictions with time-delay
    plt.close('all')
    for traj in clean_trajectories[:10]:
        # act_t is the reference velocity at t+delay. we want pairs act[i], vel[i+delay]
        act_fwd = np.pad(traj.act_fwd, [[time_delay, 0]],
                         'constant')  # add zeros to beginning
        act_rot = np.pad(traj.act_rot, [[time_delay, 0]], 'constant')
        act_fwd = act_fwd[:-time_delay -
                          1]  # drop last actions, we have not seen their effect since the episode terminated
        act_rot = act_rot[:-time_delay - 1]

        # lin_vel_tmo[t] = vel[t-1]
        lin_vel_tmo = np.pad(traj.lin_vel[:-1], [
            [1, 0],
        ], 'constant')
        ang_vel_tmo = np.pad(traj.ang_vel[:-1], [
            [1, 0],
        ], 'constant')

        pred_xy, pred_yaw = pred_func(best_scalers, lin_vel_tmo, act_fwd,
                                      ang_vel_tmo, act_rot)

        plt.figure()
        plt.plot(np.arange(len(traj.lin_vel)),
                 np.zeros_like(traj.lin_vel),
                 color='black',
                 marker='',
                 linestyle='-',
                 linewidth=1)
        plt.plot(np.arange(len(traj.lin_vel)),
                 traj.lin_vel,
                 color='blue',
                 marker='.',
                 linestyle='-',
                 linewidth=1.2)
        plt.plot(np.arange(len(pred_xy)),
                 pred_xy,
                 color='green',
                 marker='.',
                 linestyle='-',
                 linewidth=1.2)
        plt.plot(np.arange(len(act_fwd)),
                 act_fwd * fwd_scaler,
                 color='red',
                 marker='.',
                 linestyle='-',
                 linewidth=1.2)
        plt.ylim([-0.1, 1.5])

        plt.figure()
        plt.plot(np.arange(len(traj.ang_vel)),
                 np.zeros_like(traj.ang_vel),
                 color='black',
                 marker='',
                 linestyle='-',
                 linewidth=1)
        plt.plot(np.arange(len(traj.ang_vel)),
                 np.rad2deg(traj.ang_vel),
                 color='blue',
                 marker='.',
                 linestyle='-',
                 linewidth=1.2)
        plt.plot(np.arange(len(pred_yaw)),
                 np.rad2deg(pred_yaw),
                 color='green',
                 marker='.',
                 linestyle='-',
                 linewidth=1.2)
        plt.plot(np.arange(len(act_rot)),
                 np.rad2deg(act_rot * rot_scaler),
                 color='red',
                 marker='.',
                 linestyle='-',
                 linewidth=1.2)
        plt.ylim([-10, 10])

    plt.show()
    pdb.set_trace()
示例#5
0
    def __init__(
        self,
        model_path,
        model_param_path,
        update_freq,
        filter_tuning,
        imu_calib: Optional[ImuCalib] = None,
        force_cpu=False,
    ):

        config_from_network = dotdict({})
        with open(model_param_path) as json_file:
            data_json = json.load(json_file)
            config_from_network["imu_freq_net"] = data_json["imu_freq"]
            config_from_network["past_time"] = data_json["past_time"]
            config_from_network["window_time"] = data_json["window_time"]
            config_from_network["arch"] = data_json["arch"]

        # frequencies and sizes conversion
        if not (config_from_network.past_time *
                config_from_network.imu_freq_net).is_integer():
            raise ValueError(
                "past_time cannot be represented by integer number of IMU data."
            )
        if not (config_from_network.window_time *
                config_from_network.imu_freq_net).is_integer():
            raise ValueError(
                "window_time cannot be represented by integer number of IMU data."
            )
        self.imu_freq_net = (config_from_network.imu_freq_net
                             )  # imu frequency as input to the network
        self.past_data_size = int(config_from_network.past_time *
                                  config_from_network.imu_freq_net)
        self.disp_window_size = int(config_from_network.window_time *
                                    config_from_network.imu_freq_net)
        self.net_input_size = self.disp_window_size + self.past_data_size

        # EXAMPLE :
        # if using 200 samples with step size 10, inference at 20 hz
        # we do update between clone separated by 19=update_distance_num_clone-1 other clone
        # if using 400 samples with 200 past data and clone_every_n_netimu_sample 10, inference at 20 hz
        # we do update between clone separated by 19=update_distance_num_clone-1 other clone
        if not (config_from_network.imu_freq_net / update_freq).is_integer():
            raise ValueError("update_freq must be divisible by imu_freq_net.")
        if not (config_from_network.window_time * update_freq).is_integer():
            raise ValueError(
                "window_time cannot be represented by integer number of updates."
            )
        self.update_freq = update_freq
        self.clone_every_n_netimu_sample = int(
            config_from_network.imu_freq_net /
            update_freq)  # network inference/filter update interval
        assert (config_from_network.imu_freq_net % update_freq == 0
                )  # imu frequency must be a multiple of update frequency
        self.update_distance_num_clone = int(config_from_network.window_time *
                                             update_freq)

        # time
        self.dt_interp_us = int(1.0 / self.imu_freq_net * 1e6)
        self.dt_update_us = int(1.0 / self.update_freq *
                                1e6)  # multiple of interpolation interval

        # logging
        logging.info(
            f"Network Input Time: {config_from_network.past_time + config_from_network.window_time} = {config_from_network.past_time} + {config_from_network.window_time} (s)"
        )
        logging.info(
            f"Network Input size: {self.net_input_size} = {self.past_data_size} + {self.disp_window_size} (samples)"
        )
        logging.info("IMU interpolation frequency: %s (Hz)" %
                     self.imu_freq_net)
        logging.info("Measurement update frequency: %s (Hz)" %
                     self.update_freq)
        logging.info("Filter update stride state number: %i" %
                     self.update_distance_num_clone)
        logging.info(
            f"Interpolating IMU measurement every {self.dt_interp_us}us for the network input"
        )

        # IMU initial calibration
        self.icalib = imu_calib
        # MSCKF
        self.filter = ImuMSCKF(filter_tuning)

        net_config = {
            "in_dim": (self.past_data_size + self.disp_window_size) // 32 + 1
        }
        self.meas_source = MeasSourceNetwork(model_path,
                                             config_from_network["arch"],
                                             net_config, force_cpu)
        # self.meas_source = MeasSourceTorchScript(model_path, force_cpu)

        self.imu_buffer = ImuBuffer()

        #  This callback is called at first update if set
        self.callback_first_update = None
        # This callback can be use to bypass network use for measurement
        self.debug_callback_get_meas = None

        # keep track of past timestamp and measurement
        self.last_t_us, self.last_acc, self.last_gyr = -1, None, None
        self.next_interp_t_us = None
        self.next_aug_t_us = None
        self.has_done_first_update = False
示例#6
0
    """
    path, filename = os.path.split(fullpath)
    filename, ext = os.path.splitext(filename)
    sys.path.insert(0, path)
    module = importlib.import_module(filename, path)
    #importlib.reload(module)  # Might be out of date
    del sys.path[0]
    return module


userSettings = import_path(os.path.join('.', 'settings'))
symbolSettings = None
symbol = sys.argv[1] if len(sys.argv) > 1 else None
if symbol:
    print("Importing symbol settings for %s..." % symbol)
    try:
        symbolSettings = import_path(os.path.join('..',
                                                  'settings-%s' % symbol))
    except Exception as e:
        print("Unable to find settings-%s.py." % symbol)

# Assemble settings.
settings = {}
settings.update(vars(baseSettings))
settings.update(vars(userSettings))
if symbolSettings:
    settings.update(vars(symbolSettings))

# Main export
settings = dotdict.dotdict(settings)
示例#7
0
    def __init__(self, args, dataset):
        # initialize data IO
        self.input = DataIO()
        self.input.load_all(dataset, args)
        self.input.load_vio(dataset, args)

        # log file initialization
        outdir = os.path.join(args.out_dir, dataset)
        if os.path.exists(outdir) is False:
            os.mkdir(outdir)
        outfile = os.path.join(outdir, args.out_filename)
        if os.path.exists(outfile):
            if not args.erase_old_log:
                logging.warning(f"{outfile} already exists, skipping")
                raise FileExistsError
            else:
                os.remove(outfile)
                logging.warning("previous log file erased")

        self.outfile = os.path.join(outdir, args.out_filename)
        self.f_state = open(outfile, "w")
        self.f_debug = open(os.path.join(outdir, "debug.txt"), "w")
        logging.info(f"writing to {outfile}")

        imu_calib = ImuCalib.from_attitude_file(dataset, args)

        filter_tuning = dotdict({
            "g_norm":
            args.g_norm,
            "sigma_na":
            args.sigma_na,
            "sigma_ng":
            args.sigma_ng,
            "ita_ba":
            args.ita_ba,
            "ita_bg":
            args.ita_bg,
            "init_attitude_sigma":
            args.init_attitude_sigma,  # rad
            "init_yaw_sigma":
            args.init_yaw_sigma,  # rad
            "init_vel_sigma":
            args.init_vel_sigma,  # m/s
            "init_pos_sigma":
            args.init_pos_sigma,  # m
            "init_bg_sigma":
            args.init_bg_sigma,  # rad/s
            "init_ba_sigma":
            args.init_ba_sigma,  # m/s^2
            "meascov_scale":
            args.meascov_scale,
            "use_const_cov":
            args.use_const_cov,
            "const_cov_val_x":
            args.const_cov_val_x,  # sigma^2
            "const_cov_val_y":
            args.const_cov_val_y,  # sigma^2
            "const_cov_val_z":
            args.const_cov_val_z,  # sigma^2
            "add_sim_meas_noise":
            args.add_sim_meas_noise,
            "sim_meas_cov_val":
            args.sim_meas_cov_val,
            "sim_meas_cov_val_z":
            args.sim_meas_cov_val_z,
            "mahalanobis_fail_scale":
            args.mahalanobis_fail_scale,
        })

        # ImuTracker object
        self.tracker = ImuTracker(
            model_path=args.model_path,
            model_param_path=args.model_param_path,
            update_freq=args.update_freq,
            filter_tuning=filter_tuning,
            imu_calib=imu_calib,
        )

        # output
        self.log_output_buffer = None
示例#8
0
from utils.PrintBoard import PrintBoard

BLACK = -2
WHITE = 2
EMPTY = 0
ARROW = 1

# 训练模式的参数
args = dotdict({
    'num_iter': 10,  # 神经网络训练次数
    'num_play_game': 20,  # 下“num_play_game”盘棋训练一次NNet
    'max_len_queue': 200000,  # 双向列表最大长度
    'num_mcts_search': 5,  # 从某状态模拟搜索到叶结点次数
    'max_batch_size': 20,  # NNet每次训练的最大数据量
    'Cpuct': 1,  # 置信上限函数中的“温度”超参数
    'arenaCompare': 40,
    'tempThreshold': 35,  # 探索效率
    'updateThreshold': 0.55,  # 新旧网络更新阈值

    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/models/', 'best.pth.tar'),
})


class TrainMode:
    """
    自博弈类
    """

    def __init__(self, game, nnet):