def load_model(prefix, model_id, net_params, *args, **kwargs): params = load_param((util.get_dir(prefix), model_id)) model = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, *args, **kwargs, ) model.set_parameters(params) return model
def load_model(model_path, net_params_path, pmodel=None, *args, **kwargs): net_params = json.loads(open(net_params_path).read()) params = load_param(model_path) test_mode = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, *args, **kwargs, ) test_mode.set_parameters(params) # construct model model = ModelCollectionDAG() if pmodel: model.add_model('premise', pmodel, []) model.add_model('test_model', test_mode, ['premise']) else: model.add_model('test_model', test_mode, []) model.set_trainable('test_model') return model
# get dataset n_state = 500 offset_fix = 0 binarize = 0.01 GAME_NAME = 'atari' dataset = parse_dataset(dataset_name=GAME_NAME, n_state=n_state, binarize=binarize, offset_fix=offset_fix) # get ball model prev_net_params_path_1 = 'ObjectRecognition/net_params/attn_softmax.json' prev_weight_path_1 = 'results/cmaes_soln/focus_atari_breakout/paddle_bin_smooth.pth' prev_net_params_1 = json.loads(open(prev_net_params_path_1).read()) prev_model_1 = ModelFocusCNN( image_shape=(84, 84), net_params=prev_net_params_1, ) prev_model_1.set_parameters(load_param(prev_weight_path_1)) prev_net_params_path_2 = 'ObjectRecognition/net_params/attn_softmax.json' prev_weight_path_2 = 'results/cmaes_soln/focus_atari_breakout/42531_2_smooth_2.pth' prev_net_params_2 = json.loads(open(prev_net_params_path_2).read()) prev_model_2 = ModelFocusCNN( image_shape=(84, 84), net_params=prev_net_params_2, ) prev_model_2.set_parameters(load_param(prev_weight_path_2)) prev_model = ModelCollectionDAG() prev_model.add_model('model_1', prev_model_1, [], augment_fn=partial(util.remove_mean_batch, nb_size=(3, 8)))
# train baseline: python add_edge.py --model-form raw --optimizer-form A2C --record-rollouts "data/random/" --train-edge "Action->Reward" --num-stack 4 --train --num-iters 1000000 --state-forms raw --state-names Paddle --changepoint-dir ./data/rawgraph/ --reward-form raw --lr 7e-4 --greedy-epsilon 0 --value-loss-coef 0.5 --optim RMSprop --behavior-policy esp --gamma 0.99 --init-form orth --factor 16 --num-layers 1 --warm-up 0 --log-interval 100 --entropy-coef .01 --normalize --reward-check 5 --changepoint-queue 5 --env AtariBreakoutNoFrameskip-v0 --gpu 3 --true-environment --lag-num 0 --post-transform-form linear --return-form value > a2c.txt # python add_edge.py --model-form raw --optimizer-form PPO --record-rollouts "data/random/" --train-edge "Action->Reward" --num-stack 4 --train --num-iters 1000000 --state-forms raw --state-names Paddle --changepoint-dir ./data/rawgraph/ --reward-form raw --lr 2.5e-4 --greedy-epsilon 0 --gamma 0.99 --value-loss-coef 0.5 --optim RMSprop --init-form orth --factor 16 --num-layers 1 --warm-up 0 --log-interval 10 --entropy-coef .01 --normalize --reward-check 128 --changepoint-queue 128 --buffer-clip 128 --num-grad-states 32 --grad-epoch 4 --clip-param 0.1 --env AtariBreakoutNoFrameskip-v0 --gpu 2 --true-environment --lag-num 0 --post-transform-form linear --return-form normal > ataribaseline.txt # Action->Gripper: python add_edge.py --env SelfPusher --true-environment --model-form basic --optimizer-form DQN --record-rollouts "data/pusherrandom/" --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --num-stack 2 --train --num-iters 500 --save-dir data/pusheraction --state-forms bounds --state-names Gripper --frameskip 3 --init-form smalluni --save-models --save-graph data/fullpusher/Action-\>Gripper/ > ./pusher/action_gripper.txt # Gripper->Block (touch): python add_edge.py --model-form vector --optimizer-form PPO --record-rollouts "data/extragripper/" --train-edge "Gripper->Block" --num-stack 1 --train --num-iters 1000 --state-forms prox bounds bounds --state-names Gripper Block Block --env SelfPusher --true-environment --base-node Action --changepoint-dir ./data/pushergraph/ --lr 7e-5 --behavior-policy esp --gamma .99 --init-form xnorm --num-layers 1 --reward-check 128 --changepoint-queue-len 128 --greedy-epsilon .001 --log-interval 10 --num-steps 1 --frameskip 3 --factor 16 --key-dim 2048 --num-grad-states 32 --return-form value --grad-epoch 8 --acti sin --save-dir ../datasets/caleb_data/blockvec --save-graph data/blockvec --save-models > blockvec.txt # Pusher Baseline: python add_edge.py --model-form vector --optimizer-form PPO --record-rollouts "data/gripperdir/" --train-edge "Action->Reward" --num-stack 1 --train --num-iters 100000 --state-forms bounds bounds bounds prox prox --state-names Gripper Block Target Gripper__Block Block__Target --changepoint-dir ./data/rawgraph/ --true-environment --reward-form rawdist --lr 7e-4 --greedy-epsilon 0 --value-loss-coef 0.5 --init-form orth --behavior-policy esp --gamma .99 --num-layers 1 --reward-check 128 --changepoint-queue-len 128 --greedy-epsilon .001 --log-interval 10 --num-steps 1 --frameskip 3 --factor 16 --key-dim 2048 --num-grad-states 32 --return-form value --env SelfPusher --grad-epoch 8 --acti tanh --gpu 2 --frameskip 2 --normalize > pusherppo.txt args = get_args() torch.cuda.set_device(args.gpu) # loading vision model paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' # paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json' net_params = json.loads(open(paddle_model_net_params_path).read()) params = load_param('results/cmaes_soln/focus_self/paddle.pth') # params = load_param('ObjectRecognition/models/atari/paddle_bin_smooth.pth') paddle_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.000) paddle_model.set_parameters(params) # ball_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' ball_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json' net_params = json.loads(open(ball_model_net_params_path).read()) params = load_param('results/cmaes_soln/focus_self/ball.pth') # params = load_param('ObjectRecognition/models/atari/42531_2_smooth_3_2.pth') ball_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.0) ball_model.set_parameters(params) model = ModelCollectionDAG() model.add_model('Paddle', paddle_model, [], augment_fn=util.RemoveMeanMemory(nb_size=(8, 8)))
- specify changepoint detector which fits the dynamic of the object """ if args.champ: cpd = CHAMPDetector('premise->object', CHAMP_params) else: logger.info('using simple linear changepoint detector') cpd = LinearCPD(np.pi / 4.0) """ Model Template & Constructor """ model = ModelCollectionDAG() net_params = json.loads(open(args.net).read()) if args.model_type == 'focus': train_model = ModelFocusCNN( image_shape=dataset.frame_shape, net_params=net_params, use_prior=args.prior, argmax_mode=args.argmax_mode, ) elif args.model_type == 'attn': train_model = ModelAttentionCNN( image_shape=dataset.frame_shape, net_params=net_params, ) logger.info('loaded net_params %s' % (str(net_params))) # boosting with trained models if args.boost: # a model to be boosted b_net_params_path, b_weight_path = args.boost b_net_params = json.loads(open(b_net_params_path).read()) b_params = load_param(b_weight_path)
# state-forms # state-names # Usage Example: # add Action->Paddle: python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/random/" --train-edge "Action->Paddle" --num-stack 2 --train --num-iters 10000 --save-dir data/action --state-forms bounds --state-names Paddle # Using tabular Action->Paddle: python add_edge.py --model-form tab --optimizer-form TabQ --record-rollouts "data/random/" --train-edge "Action->Paddle" --num-stack 1 --train --num-iters 10000 --save-dir data/action --state-forms bounds --state-names Paddle --num-update-model 1 # Action->Paddle: python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/random/" --train-edge "Action->Paddle" --changepoint-dir data/integrationgraph --num-stack 2 --factor 6 --train --num-iters 1000 --save-dir data/action --state-forms bounds --state-names Paddle --num-steps 1 --reward-check 5 --num-update-model 1 --greedy-epsilon .1 --lr 1e-2 --init-form smalluni --behavior-policy egr --grad-epoch 5 --entropy-coef .01 --value-loss-coef 0.5 --gamma .9 --save-models --save-dir data/integrationpaddle --save-graph data/intnetpaddle > integration/paddle.txt # python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 30 --state-forms prox vel --state-names Paddle Ball --changepoint-dir ./data/integrationgraph/ --lr 5e-3 --behavior-policy esp --reward-form bounce --gamma .87 --init-form xuni --factor 8 --num-layers 1 --base-form basic --select-ratio .2 --num-population 10 --sample-duration 100 --sample-schedule 12 --warm-up 0 --log-interval 1 --scale 2 --reward-check 10 --save-models --save-dir data/integrationbounce > integration/ball.txt args = get_args() torch.cuda.set_device(args.gpu) # loading vision model paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' net_params = json.loads(open(paddle_model_net_params_path).read()) params = load_param('ObjectRecognition/models/paddle_bin_long_smooth_2.pth') paddle_model = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, ) paddle_model.set_parameters(params) ball_model_net_params_path = 'ObjectRecognition/net_params/two_layer.json' net_params = json.loads(open(ball_model_net_params_path).read()) params = load_param('ObjectRecognition/models/ball.npy') ball_model = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, ) ball_model.set_parameters(params) model = ModelCollectionDAG() model.add_model('Paddle', paddle_model, [], augment_fn=util.RemoveMeanMemory(nb_size=(3, 9))) model.add_model('Ball', ball_model, ['Paddle']) ####
'filter': 2, 'channel': [3, 1], 'kernel_size': [3, 5], 'stride': [1, 1], 'padding': [2, 4], 'activation_fn': ['ReLU', 'Tanh'] } net_params_2 = { 'filter': 2, 'channel': [10, 1], 'kernel_size': [3, 5], 'stride': [1, 1], 'padding': [2, 4], 'activation_fn': ['ReLU6', 'Tanh'] } model_1 = ModelFocusCNN((84, 84), net_params=net_params_1) model_2 = ModelFocusCNN((84, 84), net_params=net_params_2) model_boost = ModelFocusBoost( LinearCPD(np.pi / 4), model_1, model_2, train_flags=[True, False], ) # parameters print(model_boost.count_parameters()) ones = np.arange(model_boost.count_parameters()) model_boost.set_parameters(ones) print(list(model_boost.parameters())) # forward
use_prior=args.prior, argmax_mode=args.argmax_mode, ) save_path = util.get_dir(os.path.join(prefix, 'focus_img_%s'%model_id)) if plot_flags['plot_filter']: plot_model_filter(r_model, save_path) # boosting with trained models if args.boost: # partial ball model to be boosted ball_model_id = 'results/cmaes_soln/focus_atari_breakout/42080_16.npy' ball_net_params_text = open('objRecog/net_params/two_layer.json').read() ball_net_params = json.loads(ball_net_params_text) ball_params = load_param(ball_model_id) ball_model = ModelFocusCNN( image_shape=(84, 84), net_params=ball_net_params, ) ball_model.set_parameters(ball_params) # boosting ensemble r_model = ModelFocusBoost( ball_model, r_model, train_flags=[False, True], cp_detector=cpd, ) model = ModelCollectionDAG() if args.premise_path: pmodel_weight_path = args.premise_path pmodel_net_params_text = open(args.premise_net).read() pmodel_net_params = json.loads(pmodel_net_params_text)
import numpy as np import torch from ObjectRecognition.model import ModelFocusCNN net_params = { 'filter': 2, 'channel': [3, 1], 'kernel_size': [3, 5], 'stride': [1, 1], 'padding': [2, 4], 'activation_fn': ['ReLU', 'Tanh'] } model = ModelFocusCNN((84, 84), net_params=net_params) # forward out = model.forward(torch.zeros([100, 1, 84, 84])) print(out, out.shape) # parameters print(model.count_parameters()) ones = np.arange(model.count_parameters()) model.set_parameters(ones) print(list(model.parameters()))
# action micp micplosses = [] action_micploss = ActionMICPLoss( game, mi_match_coeff=1.0, mi_diffs_coeff=0.2, verbose=True, ) # micplosses.append(action_micploss) # premise loss pmodel_net_params_path = 'ObjectRecognition/net_params/two_layer_5_5.json' net_params = json.loads(open(pmodel_net_params_path).read()) params = load_param('results/cmaes_soln/focus_self/paddle_bin_long.npy') pmodel = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, ) pmodel.set_parameters(params) paddle_model = load_model('results/cmaes_soln/focus_self/paddle_bin.npy', 'ObjectRecognition/net_params/two_layer.json', pmodel=pmodel) ball_model = load_model('results/cmaes_soln/focus_self/ball_bin.npy', 'ObjectRecognition/net_params/two_layer.json', pmodel=pmodel) comp_model = load_model('results/cmaes_soln/focus_self/42068_40.npy', 'ObjectRecognition/net_params/two_layer.json', pmodel=pmodel) premise_micploss = PremiseMICPLoss( game, 'premise',