def reset(self): # TODO: isolate this for concurrent usage? # TODO: more config to CMA-ES # initialize pycma class xinit = np.random.normal(loc=0.0, scale=1.0, size=self.dim) from ObjectRecognition.model import load_param; xinit = load_param('results/cmaes_soln/focus_self/ball_bin.npy') # TODO: remov if self.cheating: # for testing filter generality xinit = util.cheat_init_center((10, 10), 3, self.cheating) self.cmaes_params['popsize'] = 2 self.cmaes = cma.CMAEvolutionStrategy(xinit, 0.1, self.cmaes_params)
def load_model(prefix, model_id, net_params, *args, **kwargs): params = load_param((util.get_dir(prefix), model_id)) model = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, *args, **kwargs, ) model.set_parameters(params) return model
def load_model(model_path, net_params_path, pmodel=None, *args, **kwargs): net_params = json.loads(open(net_params_path).read()) params = load_param(model_path) test_mode = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, *args, **kwargs, ) test_mode.set_parameters(params) # construct model model = ModelCollectionDAG() if pmodel: model.add_model('premise', pmodel, []) model.add_model('test_model', test_mode, ['premise']) else: model.add_model('test_model', test_mode, []) model.set_trainable('test_model') return model
binarize = 0.01 GAME_NAME = 'atari' dataset = parse_dataset(dataset_name=GAME_NAME, n_state=n_state, binarize=binarize, offset_fix=offset_fix) # get ball model prev_net_params_path_1 = 'ObjectRecognition/net_params/attn_softmax.json' prev_weight_path_1 = 'results/cmaes_soln/focus_atari_breakout/paddle_bin_smooth.pth' prev_net_params_1 = json.loads(open(prev_net_params_path_1).read()) prev_model_1 = ModelFocusCNN( image_shape=(84, 84), net_params=prev_net_params_1, ) prev_model_1.set_parameters(load_param(prev_weight_path_1)) prev_net_params_path_2 = 'ObjectRecognition/net_params/attn_softmax.json' prev_weight_path_2 = 'results/cmaes_soln/focus_atari_breakout/42531_2_smooth_2.pth' prev_net_params_2 = json.loads(open(prev_net_params_path_2).read()) prev_model_2 = ModelFocusCNN( image_shape=(84, 84), net_params=prev_net_params_2, ) prev_model_2.set_parameters(load_param(prev_weight_path_2)) prev_model = ModelCollectionDAG() prev_model.add_model('model_1', prev_model_1, [], augment_fn=partial(util.remove_mean_batch, nb_size=(3, 8))) # prev_model.add_model('model_2', prev_model_2, ['model_1']) f1 = util.LowIntensityFiltering(5.0)
# atari python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/atarirandom/" --train-edge "Action->Paddle" --changepoint-dir data/atarigraph/ --num-stack 2 --factor 6 --train --num-iters 1000 --save-dir data/action --state-forms bounds --state-names Paddle --num-steps 1 --reward-check 3 --changepoint-queue-len 10 --num-update-model 1 --greedy-epsilon .1 --lr 1e-2 --init-form smalluni --behavior-policy egq --grad-epoch 5 --entropy-coef .01 --value-loss-coef 0.5 --gamma 0.1 --focus-dumps-name focus_dumps.txt --env AtariBreakoutNoFrameskip-v0 --save-models --save-dir data/ataripaddle --save-graph data/atarinetpaddle > atari/paddle.txt # python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 30 --state-forms prox vel --state-names Paddle Ball --changepoint-dir ./data/integrationgraph/ --lr 5e-3 --behavior-policy esp --reward-form bounce --gamma .87 --init-form xuni --factor 8 --num-layers 1 --base-form basic --select-ratio .2 --num-population 10 --sample-duration 100 --sample-schedule 15 --warm-up 0 --log-interval 1 --scale 2 --reward-check 10 --focus-dumps-name focus_dumps.txt --env AtariBreakoutNoFrameskip-v0 --save-models --save-dir data/ataribounce > atari/ball.txt # first train: python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 100 --state-forms prox vel vel --state-names Paddle Ball Paddle --changepoint-dir ./data/atarigraph/ --lr 5e-3 --greedy-epsilon .01 --behavior-policy esp --gamma 0 --init-form smalluni --factor 12 --num-layers 1 --base-form basic --num-population 10 --retest 2 --OoO-eval --sample-duration 100 --sample-schedule 15 --done-swapping 0 --warm-up 0 --log-interval 1 --init-var 5e-2 --scale 1 --reward-check 20 --focus-dumps-name focus_dumps.txt --env AtariBreakoutNoFrameskip-v0 --save-dir data/atariball --save-models --save-graph data/atariballgraph --save-interval 1 > atariball.txt # train baseline: python add_edge.py --model-form raw --optimizer-form A2C --record-rollouts "data/random/" --train-edge "Action->Reward" --num-stack 4 --train --num-iters 1000000 --state-forms raw --state-names Paddle --changepoint-dir ./data/rawgraph/ --reward-form raw --lr 7e-4 --greedy-epsilon 0 --value-loss-coef 0.5 --optim RMSprop --behavior-policy esp --gamma 0.99 --init-form orth --factor 16 --num-layers 1 --warm-up 0 --log-interval 100 --entropy-coef .01 --normalize --reward-check 5 --changepoint-queue 5 --env AtariBreakoutNoFrameskip-v0 --gpu 3 --true-environment --lag-num 0 --post-transform-form linear --return-form value > a2c.txt # python add_edge.py --model-form raw --optimizer-form PPO --record-rollouts "data/random/" --train-edge "Action->Reward" --num-stack 4 --train --num-iters 1000000 --state-forms raw --state-names Paddle --changepoint-dir ./data/rawgraph/ --reward-form raw --lr 2.5e-4 --greedy-epsilon 0 --gamma 0.99 --value-loss-coef 0.5 --optim RMSprop --init-form orth --factor 16 --num-layers 1 --warm-up 0 --log-interval 10 --entropy-coef .01 --normalize --reward-check 128 --changepoint-queue 128 --buffer-clip 128 --num-grad-states 32 --grad-epoch 4 --clip-param 0.1 --env AtariBreakoutNoFrameskip-v0 --gpu 2 --true-environment --lag-num 0 --post-transform-form linear --return-form normal > ataribaseline.txt # Action->Gripper: python add_edge.py --env SelfPusher --true-environment --model-form basic --optimizer-form DQN --record-rollouts "data/pusherrandom/" --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --num-stack 2 --train --num-iters 500 --save-dir data/pusheraction --state-forms bounds --state-names Gripper --frameskip 3 --init-form smalluni --save-models --save-graph data/fullpusher/Action-\>Gripper/ > ./pusher/action_gripper.txt # Gripper->Block (touch): python add_edge.py --model-form vector --optimizer-form PPO --record-rollouts "data/extragripper/" --train-edge "Gripper->Block" --num-stack 1 --train --num-iters 1000 --state-forms prox bounds bounds --state-names Gripper Block Block --env SelfPusher --true-environment --base-node Action --changepoint-dir ./data/pushergraph/ --lr 7e-5 --behavior-policy esp --gamma .99 --init-form xnorm --num-layers 1 --reward-check 128 --changepoint-queue-len 128 --greedy-epsilon .001 --log-interval 10 --num-steps 1 --frameskip 3 --factor 16 --key-dim 2048 --num-grad-states 32 --return-form value --grad-epoch 8 --acti sin --save-dir ../datasets/caleb_data/blockvec --save-graph data/blockvec --save-models > blockvec.txt # Pusher Baseline: python add_edge.py --model-form vector --optimizer-form PPO --record-rollouts "data/gripperdir/" --train-edge "Action->Reward" --num-stack 1 --train --num-iters 100000 --state-forms bounds bounds bounds prox prox --state-names Gripper Block Target Gripper__Block Block__Target --changepoint-dir ./data/rawgraph/ --true-environment --reward-form rawdist --lr 7e-4 --greedy-epsilon 0 --value-loss-coef 0.5 --init-form orth --behavior-policy esp --gamma .99 --num-layers 1 --reward-check 128 --changepoint-queue-len 128 --greedy-epsilon .001 --log-interval 10 --num-steps 1 --frameskip 3 --factor 16 --key-dim 2048 --num-grad-states 32 --return-form value --env SelfPusher --grad-epoch 8 --acti tanh --gpu 2 --frameskip 2 --normalize > pusherppo.txt args = get_args() torch.cuda.set_device(args.gpu) # loading vision model paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' # paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json' net_params = json.loads(open(paddle_model_net_params_path).read()) params = load_param('results/cmaes_soln/focus_self/paddle.pth') # params = load_param('ObjectRecognition/models/atari/paddle_bin_smooth.pth') paddle_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.000) paddle_model.set_parameters(params) # ball_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' ball_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json' net_params = json.loads(open(ball_model_net_params_path).read()) params = load_param('results/cmaes_soln/focus_self/ball.pth') # params = load_param('ObjectRecognition/models/atari/42531_2_smooth_3_2.pth') ball_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.0) ball_model.set_parameters(params) model = ModelCollectionDAG()
use_prior=args.prior, argmax_mode=args.argmax_mode, ) elif args.model_type == 'attn': train_model = ModelAttentionCNN( image_shape=dataset.frame_shape, net_params=net_params, ) logger.info('loaded net_params %s' % (str(net_params))) # boosting with trained models if args.boost: # a model to be boosted b_net_params_path, b_weight_path = args.boost b_net_params = json.loads(open(b_net_params_path).read()) b_params = load_param(b_weight_path) b_model = ModelFocusCNN( image_shape=(84, 84), net_params=b_net_params, ) b_model.set_parameters(b_params) # boosting ensemble train_model = ModelFocusBoost( b_model, train_model, train_flags=[False, True], cp_detector=cpd, ) # paddle model for premise MICP loss
# optimizer-form # train-edge # state-forms # state-names # Usage Example: # add Action->Paddle: python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/random/" --train-edge "Action->Paddle" --num-stack 2 --train --num-iters 10000 --save-dir data/action --state-forms bounds --state-names Paddle # Using tabular Action->Paddle: python add_edge.py --model-form tab --optimizer-form TabQ --record-rollouts "data/random/" --train-edge "Action->Paddle" --num-stack 1 --train --num-iters 10000 --save-dir data/action --state-forms bounds --state-names Paddle --num-update-model 1 # Action->Paddle: python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/random/" --train-edge "Action->Paddle" --changepoint-dir data/integrationgraph --num-stack 2 --factor 6 --train --num-iters 1000 --save-dir data/action --state-forms bounds --state-names Paddle --num-steps 1 --reward-check 5 --num-update-model 1 --greedy-epsilon .1 --lr 1e-2 --init-form smalluni --behavior-policy egr --grad-epoch 5 --entropy-coef .01 --value-loss-coef 0.5 --gamma .9 --save-models --save-dir data/integrationpaddle --save-graph data/intnetpaddle > integration/paddle.txt # python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 30 --state-forms prox vel --state-names Paddle Ball --changepoint-dir ./data/integrationgraph/ --lr 5e-3 --behavior-policy esp --reward-form bounce --gamma .87 --init-form xuni --factor 8 --num-layers 1 --base-form basic --select-ratio .2 --num-population 10 --sample-duration 100 --sample-schedule 12 --warm-up 0 --log-interval 1 --scale 2 --reward-check 10 --save-models --save-dir data/integrationbounce > integration/ball.txt args = get_args() torch.cuda.set_device(args.gpu) # loading vision model paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' net_params = json.loads(open(paddle_model_net_params_path).read()) params = load_param('ObjectRecognition/models/paddle_bin_long_smooth_2.pth') paddle_model = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, ) paddle_model.set_parameters(params) ball_model_net_params_path = 'ObjectRecognition/net_params/two_layer.json' net_params = json.loads(open(ball_model_net_params_path).read()) params = load_param('ObjectRecognition/models/ball.npy') ball_model = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, ) ball_model.set_parameters(params) model = ModelCollectionDAG() model.add_model('Paddle', paddle_model, [], augment_fn=util.RemoveMeanMemory(nb_size=(3, 9)))
model_id, net_params=net_params, use_prior=args.prior, argmax_mode=args.argmax_mode, ) save_path = util.get_dir(os.path.join(prefix, 'focus_img_%s'%model_id)) if plot_flags['plot_filter']: plot_model_filter(r_model, save_path) # boosting with trained models if args.boost: # partial ball model to be boosted ball_model_id = 'results/cmaes_soln/focus_atari_breakout/42080_16.npy' ball_net_params_text = open('objRecog/net_params/two_layer.json').read() ball_net_params = json.loads(ball_net_params_text) ball_params = load_param(ball_model_id) ball_model = ModelFocusCNN( image_shape=(84, 84), net_params=ball_net_params, ) ball_model.set_parameters(ball_params) # boosting ensemble r_model = ModelFocusBoost( ball_model, r_model, train_flags=[False, True], cp_detector=cpd, ) model = ModelCollectionDAG() if args.premise_path:
) # action micp micplosses = [] action_micploss = ActionMICPLoss( game, mi_match_coeff=1.0, mi_diffs_coeff=0.2, verbose=True, ) # micplosses.append(action_micploss) # premise loss pmodel_net_params_path = 'ObjectRecognition/net_params/two_layer_5_5.json' net_params = json.loads(open(pmodel_net_params_path).read()) params = load_param('results/cmaes_soln/focus_self/paddle_bin_long.npy') pmodel = ModelFocusCNN( image_shape=(84, 84), net_params=net_params, ) pmodel.set_parameters(params) paddle_model = load_model('results/cmaes_soln/focus_self/paddle_bin.npy', 'ObjectRecognition/net_params/two_layer.json', pmodel=pmodel) ball_model = load_model('results/cmaes_soln/focus_self/ball_bin.npy', 'ObjectRecognition/net_params/two_layer.json', pmodel=pmodel) comp_model = load_model('results/cmaes_soln/focus_self/42068_40.npy', 'ObjectRecognition/net_params/two_layer.json', pmodel=pmodel)
from AtariEnvironments.focus_atari import FocusAtariEnvironment from SelfBreakout.breakout_screen import RandomPolicy, RotatePolicy from ObjectRecognition.model import (ModelFocusCNN, ModelCollectionDAG, load_param, util) import json, sys, cv2, torch from Models.models import pytorch_model if __name__ == '__main__': torch.cuda.set_device(1) paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json' net_params = json.loads(open(paddle_model_net_params_path).read()) params = load_param('ObjectRecognition/models/atari/paddle_bin_smooth.pth') paddle_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.01) paddle_model.set_parameters(params) ball_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json' net_params = json.loads(open(ball_model_net_params_path).read()) params = load_param('ObjectRecognition/models/atari/42531_2_smooth.pth') ball_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.01) ball_model.set_parameters(params) model = ModelCollectionDAG() model.add_model('Paddle', paddle_model, [], augment_fn=util.RemoveMeanMemory(nb_size=(3, 9))) model.add_model('Ball', ball_model, ['Paddle']) screen = FocusAtariEnvironment(model, "BreakoutNoFrameskip-v0", 1, 0, sys.argv[1])
parser.add_argument('--gpu', type=int, default=0, help='the gpu to run on') parser.add_argument( '--ball', action='store_true', default=False, help='use if we need the ball model') # move into net_params? parser.add_argument('--cuda', action='store_true', default=False, help='use if we have cuda') # move into net_params? args = parser.parse_args() # paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' paddle_model_net_params_path = 'ObjectRecognition/net_params/%s.json' % args.params_name net_params = json.loads(open(paddle_model_net_params_path).read()) # params = load_param('ObjectRecognition/models/self/paddle_bin_long_smooth.pth') params = load_param(os.path.join(args.model_dir, '%s.pth' % "paddle")) paddle_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.0) paddle_model.set_parameters(params) # ball_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json' if args.ball: ball_model_net_params_path = 'ObjectRecognition/net_params/%s.json' % args.params_name net_params = json.loads(open(ball_model_net_params_path).read()) # params = load_param('ObjectRecognition/models/self/ball_bin_long_smooth.pth') params = load_param(os.path.join(args.model_dir, '%s.pth' % "ball")) ball_model = ModelFocusCNN(image_shape=(84, 84), net_params=net_params, binarize=0.0) ball_model.set_parameters(params) model = ModelCollectionDAG()