Пример #1
0
 def reset(self):
     # TODO: isolate this for concurrent usage?
     # TODO: more config to CMA-ES
     # initialize pycma class
     xinit = np.random.normal(loc=0.0, scale=1.0, size=self.dim)
     from ObjectRecognition.model import load_param; xinit = load_param('results/cmaes_soln/focus_self/ball_bin.npy')  # TODO: remov
     if self.cheating:  # for testing filter generality
         xinit = util.cheat_init_center((10, 10), 3, self.cheating) 
         self.cmaes_params['popsize'] = 2
     self.cmaes = cma.CMAEvolutionStrategy(xinit, 0.1, self.cmaes_params)
Пример #2
0
def load_model(prefix, model_id, net_params, *args, **kwargs):
    params = load_param((util.get_dir(prefix), model_id))
    model = ModelFocusCNN(
        image_shape=(84, 84),
        net_params=net_params,
        *args,
        **kwargs,
    )
    model.set_parameters(params)

    return model
Пример #3
0
def load_model(model_path, net_params_path, pmodel=None, *args, **kwargs):
    net_params = json.loads(open(net_params_path).read())
    params = load_param(model_path)
    test_mode = ModelFocusCNN(
        image_shape=(84, 84),
        net_params=net_params,
        *args,
        **kwargs,
    )
    test_mode.set_parameters(params)

    # construct model
    model = ModelCollectionDAG()
    if pmodel:
        model.add_model('premise', pmodel, [])
        model.add_model('test_model', test_mode, ['premise'])
    else:
        model.add_model('test_model', test_mode, [])
    model.set_trainable('test_model')
    return model
    binarize = 0.01
    GAME_NAME = 'atari'
    dataset = parse_dataset(dataset_name=GAME_NAME,
                            n_state=n_state,
                            binarize=binarize,
                            offset_fix=offset_fix)

    # get ball model
    prev_net_params_path_1 = 'ObjectRecognition/net_params/attn_softmax.json'
    prev_weight_path_1 = 'results/cmaes_soln/focus_atari_breakout/paddle_bin_smooth.pth'
    prev_net_params_1 = json.loads(open(prev_net_params_path_1).read())
    prev_model_1 = ModelFocusCNN(
        image_shape=(84, 84),
        net_params=prev_net_params_1,
    )
    prev_model_1.set_parameters(load_param(prev_weight_path_1))
    prev_net_params_path_2 = 'ObjectRecognition/net_params/attn_softmax.json'
    prev_weight_path_2 = 'results/cmaes_soln/focus_atari_breakout/42531_2_smooth_2.pth'
    prev_net_params_2 = json.loads(open(prev_net_params_path_2).read())
    prev_model_2 = ModelFocusCNN(
        image_shape=(84, 84),
        net_params=prev_net_params_2,
    )
    prev_model_2.set_parameters(load_param(prev_weight_path_2))
    prev_model = ModelCollectionDAG()
    prev_model.add_model('model_1',
                         prev_model_1, [],
                         augment_fn=partial(util.remove_mean_batch,
                                            nb_size=(3, 8)))
    # prev_model.add_model('model_2', prev_model_2, ['model_1'])
    f1 = util.LowIntensityFiltering(5.0)
Пример #5
0
    # atari python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/atarirandom/" --train-edge "Action->Paddle" --changepoint-dir data/atarigraph/ --num-stack 2 --factor 6 --train --num-iters 1000 --save-dir data/action --state-forms bounds --state-names Paddle --num-steps 1 --reward-check 3 --changepoint-queue-len 10 --num-update-model 1 --greedy-epsilon .1 --lr 1e-2 --init-form smalluni --behavior-policy egq --grad-epoch 5 --entropy-coef .01 --value-loss-coef 0.5 --gamma 0.1 --focus-dumps-name focus_dumps.txt --env AtariBreakoutNoFrameskip-v0 --save-models --save-dir data/ataripaddle --save-graph data/atarinetpaddle > atari/paddle.txt
    # python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 30 --state-forms prox vel --state-names Paddle Ball --changepoint-dir ./data/integrationgraph/ --lr 5e-3 --behavior-policy esp --reward-form bounce --gamma .87 --init-form xuni --factor 8 --num-layers 1 --base-form basic --select-ratio .2 --num-population 10 --sample-duration 100 --sample-schedule 15 --warm-up 0 --log-interval 1 --scale 2 --reward-check 10 --focus-dumps-name focus_dumps.txt --env AtariBreakoutNoFrameskip-v0 --save-models --save-dir data/ataribounce  > atari/ball.txt
    # first train: python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 100 --state-forms prox vel vel --state-names Paddle Ball Paddle --changepoint-dir ./data/atarigraph/ --lr 5e-3 --greedy-epsilon .01 --behavior-policy esp --gamma 0 --init-form smalluni --factor 12 --num-layers 1 --base-form basic --num-population 10 --retest 2 --OoO-eval --sample-duration 100 --sample-schedule 15 --done-swapping 0 --warm-up 0 --log-interval 1 --init-var 5e-2 --scale 1 --reward-check 20 --focus-dumps-name focus_dumps.txt --env AtariBreakoutNoFrameskip-v0 --save-dir data/atariball --save-models --save-graph data/atariballgraph --save-interval 1  > atariball.txt
    # train baseline: python add_edge.py --model-form raw --optimizer-form A2C --record-rollouts "data/random/" --train-edge "Action->Reward" --num-stack 4 --train --num-iters 1000000 --state-forms raw --state-names Paddle --changepoint-dir ./data/rawgraph/ --reward-form raw --lr 7e-4 --greedy-epsilon 0 --value-loss-coef 0.5 --optim RMSprop --behavior-policy esp --gamma 0.99 --init-form orth --factor 16 --num-layers 1 --warm-up 0 --log-interval 100 --entropy-coef .01 --normalize --reward-check 5 --changepoint-queue 5 --env AtariBreakoutNoFrameskip-v0 --gpu 3 --true-environment --lag-num 0 --post-transform-form linear --return-form value > a2c.txt
    # python add_edge.py --model-form raw --optimizer-form PPO --record-rollouts "data/random/" --train-edge "Action->Reward" --num-stack 4 --train --num-iters 1000000 --state-forms raw --state-names Paddle --changepoint-dir ./data/rawgraph/ --reward-form raw --lr 2.5e-4 --greedy-epsilon 0 --gamma 0.99 --value-loss-coef 0.5 --optim RMSprop --init-form orth --factor 16 --num-layers 1 --warm-up 0 --log-interval 10 --entropy-coef .01 --normalize --reward-check 128 --changepoint-queue 128 --buffer-clip 128 --num-grad-states 32 --grad-epoch 4 --clip-param 0.1 --env AtariBreakoutNoFrameskip-v0 --gpu 2 --true-environment --lag-num 0 --post-transform-form linear --return-form normal > ataribaseline.txt
    # Action->Gripper: python add_edge.py --env SelfPusher --true-environment --model-form basic --optimizer-form DQN --record-rollouts "data/pusherrandom/" --changepoint-dir data/fullpusher/ --train-edge "Action->Gripper" --num-stack 2 --train --num-iters 500 --save-dir data/pusheraction --state-forms bounds --state-names Gripper --frameskip 3 --init-form smalluni --save-models --save-graph data/fullpusher/Action-\>Gripper/ > ./pusher/action_gripper.txt
    # Gripper->Block (touch): python add_edge.py --model-form vector --optimizer-form PPO --record-rollouts "data/extragripper/" --train-edge "Gripper->Block" --num-stack 1 --train --num-iters 1000 --state-forms prox bounds bounds --state-names Gripper Block Block --env SelfPusher --true-environment --base-node Action --changepoint-dir ./data/pushergraph/ --lr 7e-5 --behavior-policy esp --gamma .99 --init-form xnorm --num-layers 1 --reward-check 128 --changepoint-queue-len 128 --greedy-epsilon .001 --log-interval 10 --num-steps 1 --frameskip 3 --factor 16 --key-dim 2048 --num-grad-states 32 --return-form value --grad-epoch 8 --acti sin --save-dir ../datasets/caleb_data/blockvec --save-graph data/blockvec --save-models > blockvec.txt
    # Pusher Baseline: python add_edge.py --model-form vector --optimizer-form PPO --record-rollouts "data/gripperdir/" --train-edge "Action->Reward" --num-stack 1 --train --num-iters 100000 --state-forms bounds bounds bounds prox prox --state-names Gripper Block Target Gripper__Block Block__Target --changepoint-dir ./data/rawgraph/ --true-environment --reward-form rawdist --lr 7e-4 --greedy-epsilon 0 --value-loss-coef 0.5 --init-form orth --behavior-policy esp --gamma .99 --num-layers 1 --reward-check 128 --changepoint-queue-len 128 --greedy-epsilon .001 --log-interval 10 --num-steps 1 --frameskip 3 --factor 16 --key-dim 2048 --num-grad-states 32 --return-form value --env SelfPusher --grad-epoch 8 --acti tanh --gpu 2 --frameskip 2 --normalize > pusherppo.txt
    args = get_args()
    torch.cuda.set_device(args.gpu)

    # loading vision model
    paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json'
    # paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json'
    net_params = json.loads(open(paddle_model_net_params_path).read())
    params = load_param('results/cmaes_soln/focus_self/paddle.pth')
    # params = load_param('ObjectRecognition/models/atari/paddle_bin_smooth.pth')
    paddle_model = ModelFocusCNN(image_shape=(84, 84),
                                 net_params=net_params,
                                 binarize=0.000)
    paddle_model.set_parameters(params)
    # ball_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json'
    ball_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json'
    net_params = json.loads(open(ball_model_net_params_path).read())
    params = load_param('results/cmaes_soln/focus_self/ball.pth')
    # params = load_param('ObjectRecognition/models/atari/42531_2_smooth_3_2.pth')
    ball_model = ModelFocusCNN(image_shape=(84, 84),
                               net_params=net_params,
                               binarize=0.0)
    ball_model.set_parameters(params)
    model = ModelCollectionDAG()
Пример #6
0
            use_prior=args.prior,
            argmax_mode=args.argmax_mode,
        )
    elif args.model_type == 'attn':
        train_model = ModelAttentionCNN(
            image_shape=dataset.frame_shape,
            net_params=net_params,
        )
    logger.info('loaded net_params %s' % (str(net_params)))

    # boosting with trained models
    if args.boost:
        # a model to be boosted
        b_net_params_path, b_weight_path = args.boost
        b_net_params = json.loads(open(b_net_params_path).read())
        b_params = load_param(b_weight_path)
        b_model = ModelFocusCNN(
            image_shape=(84, 84),
            net_params=b_net_params,
        )
        b_model.set_parameters(b_params)

        # boosting ensemble
        train_model = ModelFocusBoost(
            b_model,
            train_model,
            train_flags=[False, True],
            cp_detector=cpd,
        )

    # paddle model for premise MICP loss
Пример #7
0
        # optimizer-form
        # train-edge
        # state-forms
        # state-names
    # Usage Example:
        # add Action->Paddle: python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/random/" --train-edge "Action->Paddle" --num-stack 2 --train --num-iters 10000 --save-dir data/action --state-forms bounds --state-names Paddle
        # Using tabular Action->Paddle:  python add_edge.py --model-form tab --optimizer-form TabQ --record-rollouts "data/random/" --train-edge "Action->Paddle" --num-stack 1 --train --num-iters 10000 --save-dir data/action --state-forms bounds --state-names Paddle --num-update-model 1
        # Action->Paddle: python add_edge.py --model-form basic --optimizer-form DQN --record-rollouts "data/random/" --train-edge "Action->Paddle" --changepoint-dir data/integrationgraph --num-stack 2 --factor 6 --train --num-iters 1000 --save-dir data/action --state-forms bounds --state-names Paddle --num-steps 1 --reward-check 5 --num-update-model 1 --greedy-epsilon .1 --lr 1e-2 --init-form smalluni --behavior-policy egr --grad-epoch 5 --entropy-coef .01 --value-loss-coef 0.5 --gamma .9 --save-models --save-dir data/integrationpaddle --save-graph data/intnetpaddle > integration/paddle.txt
        # python add_edge.py --model-form population --optimizer-form CMAES --record-rollouts "data/integrationpaddle/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 30 --state-forms prox vel --state-names Paddle Ball --changepoint-dir ./data/integrationgraph/ --lr 5e-3 --behavior-policy esp --reward-form bounce --gamma .87 --init-form xuni --factor 8 --num-layers 1 --base-form basic --select-ratio .2 --num-population 10 --sample-duration 100 --sample-schedule 12 --warm-up 0 --log-interval 1 --scale 2 --reward-check 10 --save-models --save-dir data/integrationbounce > integration/ball.txt
    args = get_args()
    torch.cuda.set_device(args.gpu)

    # loading vision model
    paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json'
    net_params = json.loads(open(paddle_model_net_params_path).read())
    params = load_param('ObjectRecognition/models/paddle_bin_long_smooth_2.pth')
    paddle_model = ModelFocusCNN(
        image_shape=(84, 84),
        net_params=net_params,
    )
    paddle_model.set_parameters(params)
    ball_model_net_params_path = 'ObjectRecognition/net_params/two_layer.json'
    net_params = json.loads(open(ball_model_net_params_path).read())
    params = load_param('ObjectRecognition/models/ball.npy')
    ball_model = ModelFocusCNN(
        image_shape=(84, 84),
        net_params=net_params,
    )
    ball_model.set_parameters(params)
    model = ModelCollectionDAG()
    model.add_model('Paddle', paddle_model, [], augment_fn=util.RemoveMeanMemory(nb_size=(3, 9)))
Пример #8
0
        model_id,
        net_params=net_params,
        use_prior=args.prior,
        argmax_mode=args.argmax_mode,
    )
    save_path = util.get_dir(os.path.join(prefix, 'focus_img_%s'%model_id))
    if plot_flags['plot_filter']:
        plot_model_filter(r_model, save_path)

    # boosting with trained models
    if args.boost:
        # partial ball model to be boosted
        ball_model_id = 'results/cmaes_soln/focus_atari_breakout/42080_16.npy'
        ball_net_params_text = open('objRecog/net_params/two_layer.json').read()
        ball_net_params = json.loads(ball_net_params_text)
        ball_params = load_param(ball_model_id)
        ball_model = ModelFocusCNN(
            image_shape=(84, 84),
            net_params=ball_net_params,
        )
        ball_model.set_parameters(ball_params)

        # boosting ensemble
        r_model = ModelFocusBoost(
            ball_model,
            r_model,
            train_flags=[False, True],
            cp_detector=cpd,
        )
    model = ModelCollectionDAG()
    if args.premise_path:
Пример #9
0
)

# action micp
micplosses = []
action_micploss = ActionMICPLoss(
    game,
    mi_match_coeff=1.0,
    mi_diffs_coeff=0.2,
    verbose=True,
)
# micplosses.append(action_micploss)

# premise loss
pmodel_net_params_path = 'ObjectRecognition/net_params/two_layer_5_5.json'
net_params = json.loads(open(pmodel_net_params_path).read())
params = load_param('results/cmaes_soln/focus_self/paddle_bin_long.npy')
pmodel = ModelFocusCNN(
    image_shape=(84, 84),
    net_params=net_params,
)
pmodel.set_parameters(params)
paddle_model = load_model('results/cmaes_soln/focus_self/paddle_bin.npy',
                          'ObjectRecognition/net_params/two_layer.json',
                          pmodel=pmodel)
ball_model = load_model('results/cmaes_soln/focus_self/ball_bin.npy',
                        'ObjectRecognition/net_params/two_layer.json',
                        pmodel=pmodel)
comp_model = load_model('results/cmaes_soln/focus_self/42068_40.npy',
                        'ObjectRecognition/net_params/two_layer.json',
                        pmodel=pmodel)
from AtariEnvironments.focus_atari import FocusAtariEnvironment
from SelfBreakout.breakout_screen import RandomPolicy, RotatePolicy
from ObjectRecognition.model import (ModelFocusCNN, ModelCollectionDAG,
                                     load_param, util)
import json, sys, cv2, torch
from Models.models import pytorch_model

if __name__ == '__main__':
    torch.cuda.set_device(1)
    paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json'
    net_params = json.loads(open(paddle_model_net_params_path).read())
    params = load_param('ObjectRecognition/models/atari/paddle_bin_smooth.pth')
    paddle_model = ModelFocusCNN(image_shape=(84, 84),
                                 net_params=net_params,
                                 binarize=0.01)
    paddle_model.set_parameters(params)
    ball_model_net_params_path = 'ObjectRecognition/net_params/attn_softmax.json'
    net_params = json.loads(open(ball_model_net_params_path).read())
    params = load_param('ObjectRecognition/models/atari/42531_2_smooth.pth')
    ball_model = ModelFocusCNN(image_shape=(84, 84),
                               net_params=net_params,
                               binarize=0.01)
    ball_model.set_parameters(params)
    model = ModelCollectionDAG()
    model.add_model('Paddle',
                    paddle_model, [],
                    augment_fn=util.RemoveMeanMemory(nb_size=(3, 9)))
    model.add_model('Ball', ball_model, ['Paddle'])

    screen = FocusAtariEnvironment(model, "BreakoutNoFrameskip-v0", 1, 0,
                                   sys.argv[1])
Пример #11
0
 parser.add_argument('--gpu', type=int, default=0, help='the gpu to run on')
 parser.add_argument(
     '--ball',
     action='store_true',
     default=False,
     help='use if we need the ball model')  # move into net_params?
 parser.add_argument('--cuda',
                     action='store_true',
                     default=False,
                     help='use if we have cuda')  # move into net_params?
 args = parser.parse_args()
 # paddle_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json'
 paddle_model_net_params_path = 'ObjectRecognition/net_params/%s.json' % args.params_name
 net_params = json.loads(open(paddle_model_net_params_path).read())
 # params = load_param('ObjectRecognition/models/self/paddle_bin_long_smooth.pth')
 params = load_param(os.path.join(args.model_dir, '%s.pth' % "paddle"))
 paddle_model = ModelFocusCNN(image_shape=(84, 84),
                              net_params=net_params,
                              binarize=0.0)
 paddle_model.set_parameters(params)
 # ball_model_net_params_path = 'ObjectRecognition/net_params/attn_base.json'
 if args.ball:
     ball_model_net_params_path = 'ObjectRecognition/net_params/%s.json' % args.params_name
     net_params = json.loads(open(ball_model_net_params_path).read())
     # params = load_param('ObjectRecognition/models/self/ball_bin_long_smooth.pth')
     params = load_param(os.path.join(args.model_dir, '%s.pth' % "ball"))
     ball_model = ModelFocusCNN(image_shape=(84, 84),
                                net_params=net_params,
                                binarize=0.0)
     ball_model.set_parameters(params)
 model = ModelCollectionDAG()