示例#1
0
    def __init__(self):
        self.paramSets = [parameters.ParameterSet()]
        self._current_set = 0
        self.filename = None
        self.chemistry_db_path = None

        self._undo_action = None
        self._redo_action = None
    def act(self, action):
        actions = [action, [1, 0, 0]]

        paddle_bot = self.paddles[1]
        player_bot_controller = EnvPongDraft.PaddleBot_Controller(
            self, paddle_bot, params.ParameterSet({}))
        actions[1] = player_bot_controller.act()

        self.updateEntities(actions)
        score_reward, hit_reward = self.detect_events()
        #reward = score_reward + 0.5 * hit_reward
        reward = score_reward

        return reward
示例#3
0
def main():
    dummy_params = params.ParameterSet({})

    PongGame = EnvPongDraft.EnvPong()

    paddle_player = PongGame.paddles[0]
    paddle_bot = PongGame.paddles[1]

    paddle_learner_catch_controller = EnvPongDraft.PaddleBot_Controller(
        PongGame, paddle_player, dummy_params)
    paddle_learner_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller(
        PongGame, paddle_player, dummy_params)
    paddle_learner_rand_controller = EnvPongDraft.PaddleBotRandom_Controller(
        PongGame, paddle_player, dummy_params)

    paddle_bot_catch_controller = EnvPongDraft.PaddleBot_Controller(
        PongGame, paddle_bot, dummy_params)
    paddle_bot_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller(
        PongGame, paddle_bot, dummy_params)
    paddle_bot_rand_controller = EnvPongDraft.PaddleBotRandom_Controller(
        PongGame, paddle_bot, dummy_params)

    # testing
    # actions = [[0, 1, 0], [0, 0, 1]]
    # PongGame.step(actions)

    # about 80 ball exchanges correspond to 10000 performing an action
    # STEPS = 10000
    # STEPS = 2500
    # STEPS_RANGE = range(STEPS)
    actions = [[0, 0, 0], [0, 0, 0]]

    # cumulative reward (pos, neg) for two players (learning agent, opponent)
    cum_reward = [[0, 0], [0, 0]]

    # cumulative hits for learner and opponent paddle
    cum_hits = [0, 0]

    # for counting hits for a task scenario for learner and opponent paddle
    interm_hits = [0, 0]

    PLAYERS = 2
    REWARD_TYPES = 2

    PLAYERS_RANGE = range(PLAYERS)
    REWARD_TYPES_RANGE = range(REWARD_TYPES)

    cycle_counter = 0

    # TODO: simulate full task switch procedure:
    # uniform intervall [min max]
    # corresponding to what?
    # hits on the paddle for classic;
    # ball crossing the moddle line work for both cases?
    # as for avoid case, hitting the paddle is not really helpful measure
    # TODO: in Pong Class, a counter variable for ball crossing
    # further, in detect events, rewards and punishments have to be adapted
    # depending on the task scenario
    # controller switch : can happen within or outside the env class;
    # more suitable for that is external experiment class
    # any advantage of the internal controller solution ?

    if TASK_MODE == 'classic':
        #
        PongGame.multi_task = False
        # setting active task (ID 0: classic, ID 1: avoid)
        PongGame.taskActive = [True, False]

        ball_exchange_counter = 0

        # setting controllers
        paddle_learner_controller = paddle_learner_catch_controller
        paddle_bot_controller = paddle_bot_rand_controller

        print('EXECUTING CLASSIC Pong.')
        while ball_exchange_counter <= NUM_BALL_EXCHANGE:
            actions[0] = paddle_learner_controller.act()
            actions[1] = paddle_bot_controller.act()

            # Test print
            # print 'Cycle: ', ball_exchange_counter
            # print 'of : ', NUM_BALL_EXCHANGE

            # in observation, image_date from the screen is stored
            reward, hits, observation = PongGame.step(actions)
            # image_data = PongGame.render()

            for i, j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE):
                cum_reward[i][j] += reward[i][j]

            for i in PLAYERS_RANGE:
                cum_hits[i] += hits[i]
                interm_hits[i] += hits[i]

            ball_exchange_counter = PongGame.ball_cross_counter

    elif TASK_MODE == 'avoid':
        #
        PongGame.multi_task = False
        # setting active task (ID 0: classic, ID 1: avoid)
        PongGame.taskActive = [False, True]

        ball_exchange_counter = 0

        # set both controller to avoid
        paddle_learner_controller = paddle_learner_avoid_controller
        paddle_bot_controller = paddle_bot_rand_controller

        print('EXECUTING AVOID Pong.')
        while ball_exchange_counter <= NUM_BALL_EXCHANGE:
            actions[0] = paddle_learner_controller.act()
            actions[1] = paddle_bot_controller.act()

            # Test print
            # print 'Cycle: ', ball_exchange_counter
            # print 'of : ', NUM_BALL_EXCHANGE

            # in observation, image_date from the screen is stored
            reward, hits, observation = PongGame.step(actions)
            # image_data = PongGame.render()

            for i, j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE):
                cum_reward[i][j] += reward[i][j]

            for i in PLAYERS_RANGE:
                cum_hits[i] += hits[i]
                interm_hits[i] += hits[i]

            ball_exchange_counter = PongGame.ball_cross_counter

    elif TASK_MODE == 'switch':
        print(
            'EXECUTING Multi Task Pong (switching between classic and avoid).')
        #
        PongGame.multi_task = True
        # setting active task to start with (ID 0: classic, ID 1: avoid)
        # PongGame.taskActive = [True, False]
        # task switch inverts it
        PongGame.taskActive = [False, True]

        # print '-----------------'
        # how many task switches
        while cycle_counter < CYCLES:
            print('Cycle: ', cycle_counter)
            print
            # reset ball exchange counter for the task
            ball_exchange_counter = 0
            # resetting task ball exchange counter
            PongGame.task_iter_counter = 0

            # switch task
            # PongGame.taskActive = not PongGame.taskActive
            # choosing task by inverting False/True active flags
            # (only one task at time is set as True)
            # !! TEMPORARY solution for two tasks only !!
            if SWITCH_MODE == 'alternate':
                # inverting task flags to alternate tasks
                PongGame.taskActive[:] = [not t for t in PongGame.taskActive]
            elif SWITCH_MODE == 'random':
                PongGame.taskActive = [False, False]
                idx = random.randint(0, 1)
                PongGame.taskActive[idx] = True

            # returning index of the active task (only one marked as True)
            task_id = PongGame.taskActive.index(True)

            # drawing number of ball exchange times from interval min..max
            task_ball_exchange = random.randint(MIN_BALL_EXCHANGE,
                                                MAX_BALL_EXCHANGE)
            print('Task switching.')
            print('Ball exchanges to perform: ', task_ball_exchange)

            if task_id == 0:  # classic on
                print('Switching to CLASSIC.')
                print('PongGame.taskActive : ', PongGame.taskActive)

                paddle_learner_controller = paddle_learner_catch_controller
                paddle_bot_controller = paddle_bot_rand_controller

                while ball_exchange_counter <= task_ball_exchange:
                    actions[0] = paddle_learner_controller.act()
                    actions[1] = paddle_bot_controller.act()

                    # Test print
                    # print 'Cycle: ', ball_exchange_counter
                    # print 'of : ', NUM_BALL_EXCHANGE

                    # in observation, image_date from the screen is stored
                    reward, hits, observation = PongGame.step(actions)
                    # image_data = PongGame.render()

                    for i, j in itertools.product(PLAYERS_RANGE,
                                                  REWARD_TYPES_RANGE):
                        cum_reward[i][j] += reward[i][j]

                    for i in PLAYERS_RANGE:
                        cum_hits[i] += hits[i]
                        interm_hits[i] += hits[i]

                    ball_exchange_counter = PongGame.task_iter_counter
            elif task_id == 1:  # avoid on
                print('Switching to AVOID.')
                print('PongGame.taskActive : ', PongGame.taskActive)

                paddle_learner_controller = paddle_learner_avoid_controller
                paddle_bot_controller = paddle_bot_catch_controller

                while ball_exchange_counter <= task_ball_exchange:
                    actions[0] = paddle_learner_controller.act()
                    actions[1] = paddle_bot_controller.act()

                    # Test print
                    # print 'Cycle: ', ball_exchange_counter
                    # print 'of : ', NUM_BALL_EXCHANGE

                    # in observation, image_date from the screen is stored
                    reward, hits, observation = PongGame.step(actions)
                    # image_data = PongGame.render()

                    for i, j in itertools.product(PLAYERS_RANGE,
                                                  REWARD_TYPES_RANGE):
                        cum_reward[i][j] += reward[i][j]

                    for i in PLAYERS_RANGE:
                        cum_hits[i] += hits[i]
                        interm_hits[i] += hits[i]

                    ball_exchange_counter = PongGame.task_iter_counter

            cycle_counter += 1

    print('---------------')
    print('---------------')

    print('Finished.')
    print('Cumulated reward: ', cum_reward)
    print('Cumulated hits: ', cum_hits)

    print('Class internal.')

    print('Cumulated reward : ', PongGame.total_reward)
    print('Cumulated hits : ', PongGame.total_hits)
    print('Ball crossings : ', PongGame.ball_cross_counter)
    print('Total ball crossings : ', sum(PongGame.ball_cross_counter))

    return
示例#4
0
        # create logfile recording time in seconds for different simulation steps
        # (initialization, parameters, simulation etc.)

    tic = time()

    # import main parameters dictionary for simulation
    from example_parallel_network_parameters import PSET

    # modify parameters accd. to parameterspace id ps_id
    OUTPUT = 'output'
    JOBDIR = 'jobs'
    PSETDIR = 'parameters'

    # get parameterset id, and load corresponding parameterset file
    ps_id = sys.argv[-1]
    pset = ps.ParameterSet(os.path.join(PSETDIR, ps_id + '.txt'))

    # patch up main ParameterSet object with values from ParameterSpace
    PSET = ps.ParameterSet(PSET.copy())
    PSET.update(pset)

    # compute dipole moment
    PSET.COMPUTE_P = PSET.COMPUTE_LFP

    # record population contributions to extracellular signals
    PSET.rec_pop_contribution = PSET.COMPUTE_LFP

    # compute ECoG
    PSET.COMPUTE_ECOG = PSET.COMPUTE_LFP

    # set reference network size
示例#5
0
            result = mean(losses)
        log_model(self.model, "eval", type="loss", loss=result,
            total=sum(totals), ntrain=self.model.META["ntrain"])
        self.model.META["loss"] = result
        return result

    def train_dataset(self, dataset, ntrain=100000, options=None, batch_size=None):
        loader = torchdata.DataLoader(dataset, batch_size=batch_size, shuffle=True)
        return self.train_dataloader(loader, ntrain=ntrain, options=options)

    def evaluate_dataset(self, dataset, classification=False, batch_size=200):
        loader = torchdata.DataLoader(dataset, batch_size=batch_size, shuffle=False)
        return self.evaluate_dataloader(loader, classification=classification)

default_parameters = params.ParameterSet(
    params.LogParameter("lr", 1e-6, 1e2),
    params.QuantizedLogParameter("batch_size", 5, 500)
)


def strpar(p):
    def f(x):
        if isinstance(x, float) or (isinstance(x, int) and x >= 1000000):
            x = "%.2e" % x
        else:
            x = str(x)[:10]
        return x
    return " ".join(
        ["{}={}".format(f(k), f(v)) for k, v in p.items()])


def plot_log(log, ax=None, value="loss", key="ntrain", selector="train", **kw):
        paramset.update({'random_seed': paramset['random_seed'] + i})

        ps_id = get_unique_id(paramset)
        print(ps_id)

        ## Add parameters to string listing all process IDs by parameters
        with open(os.path.join(savefolder, 'id_parameters.txt'), 'a') as f:
            f.write(ps_id + '\n')
            f.write('%.3f, %.3f, %.3f, %.3f' %
                    (paramset['eta'], paramset['g'], paramset['J'],
                     paramset['sigma_factor']) + '\n')

        # put output_path into dictionary, as we now have a unique ID of
        # though this will not affect the parameter space object PS
        spike_output_path = os.path.join(nest_output, ps_id)
        if not os.path.isdir(spike_output_path):
            os.mkdir(spike_output_path)

        paramset.update({
            'ps_id': ps_id,
            'spike_output_path': spike_output_path,
            'savefolder': savefolder
        })

        # write using ps.ParemeterSet native format
        parameterset_file = os.path.join(parameterset_dest,
                                         '{}.pset'.format(ps_id))
        ps.ParameterSet(paramset).save(url=parameterset_file)
        # specify where to save output and errors
        nest_output_file = os.path.join(log_dir, ps_id + '.txt')
示例#7
0
        self._updateUndoRedo()

        return True

    def _loadFile(self, filename):
        try:
            case = open(filename, 'r')
        except:
            traceback.print_exc()
            msg = 'Could not open file ' + filename + "\n, got exception:" + \
                traceback.format_exc()
            self._error(filename, 'Error opening file', msg)
            return

        p = parameters.ParameterSet()
        p.read(case)

        if not self.submitNew(p):
            pass


#            self._info("Load aborted", "file did not contain new values of parameters")

        db = p.getParamValue(parameters.CurrentDatabasePath)
        if db != "":
            self.importCurrentChemistryDB(p)

        self.setFilename(filename)

    def openFile(self, file):
BALL_COLOR = WHITE
BALL_SHAPE = 'square'

# speed settings of the paddle and ball
PADDLE_SPEED = 1.5
BALL_X_SPEED = 3
BALL_Y_SPEED = 2
BALL_SPEED = np.sqrt(BALL_X_SPEED**2.0 + BALL_Y_SPEED**2.0)
GAMMA = 0.8
EPSILON = 1.5

BACKGROUND_COLOR = BLACK

# parameter set for the whole game
params_set = params.ParameterSet({}, label="pong_test")

BALLS_NUM = 1
PADDLES_NUM = 2

params_set['balls'] = params.ParameterSet({})
params_set['paddles'] = params.ParameterSet({})

# number of balls used in a game
params_set['balls']['num'] = BALLS_NUM

balls_list = []
params_set['balls']['list'] = balls_list

for i in range(BALLS_NUM):
    ball_params = params.ParameterSet({})
示例#9
0
def main():
	dummy_params = params.ParameterSet({})

	PongGame = EnvPongDraft.EnvPong()

	paddle_player = PongGame.paddles[0]
	paddle_bot = PongGame.paddles[1]

	# different controllers to test each different task setting
	paddle_learner_catch_controller = EnvPongDraft.PaddleBot_Controller(PongGame, paddle_player, dummy_params)
	paddle_learner_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller(PongGame, paddle_player, dummy_params)
	paddle_bot_catch_controller = EnvPongDraft.PaddleBot_Controller(PongGame, paddle_bot, dummy_params)
	paddle_bot_avoid_controller = EnvPongDraft.PaddleBotAvoid_Controller(PongGame, paddle_bot, dummy_params)

	# testing
	# actions = [[0, 1, 0], [0, 0, 1]]
	# PongGame.step(actions)

	STEPS = 10000
	# STEPS = 2500
	STEPS_RANGE = range(STEPS)
	actions = [[0, 0, 0], [0, 0, 0]]

	# cumulative reward (pos, neg) for two players (learning agent, opponent)
	cum_reward =[[0, 0], [0, 0]]

	# cumulative hits for learner and opponent paddle
	cum_hits = [0, 0]

	# for counting hits for a task scenario for learner and opponent paddle
	interm_hits = [0, 0]

	PLAYERS = 2
	REWARD_TYPES = 2

	PLAYERS_RANGE = range(PLAYERS)
	REWARD_TYPES_RANGE = range(REWARD_TYPES)

	# TODO: simulate full task switch procedure:
	# uniform intervall [min max]
	# corresponding to what?
	# hits on the paddle for classic;
 	# ball crossing the moddle line work for both cases?
	# as for avoid case, hitting the paddle is not really helpful measure
	# TODO: in Pong Class, a counter variable for ball crossing
	# further, in detect events, rewards and punishments have to be adapted
	# depending on the task scenario
	# controller switch : can happen within or outside the env class;
	# more suitable for that is external experiment class
	# any advantage of the internal controller solution ?

	# switching is hard wired throught pre-defined SWITCH_MARKER - number of steps
	# here in example : switch each
	SWITCH_STEPS = 8
	SWITCH_MARKER = STEPS // SWITCH_STEPS

	# preparing controller to test catch scenario (classical pong)
	paddle_learner_controller = paddle_learner_catch_controller
	paddle_bot_controller = paddle_bot_catch_controller

	# switching first to classical pong
	catch = True

	# switching is hard wired throught pre-defined SWITCH_MARKER - number of steps

	for i in STEPS_RANGE:

		if (i != 0) and (i % SWITCH_MARKER == 0):
			print('CONTROLLER SWITCH.')
			if catch:
				print('Was catch. Switching to avoid.')
				print ('Hits in the catch episode were : ', interm_hits)
				paddle_learner_controller = paddle_learner_avoid_controller
				paddle_bot_controller = paddle_bot_avoid_controller
				catch = False
				interm_hits = [0, 0]
			elif not catch:
				print ('Was avoid. Switching to catch.')
				print ('Hits in the avoid episode were :', interm_hits)
				paddle_learner_controller = paddle_learner_catch_controller
				paddle_bot_controller = paddle_bot_catch_controller
				catch = True
				interm_hits = [0, 0]

		# random action index (0, 1, 2) for a random paddle movement
		# act_1 = random.randint(0, 2)
		# act_2 = random.randint(0, 2)

		# random action for player paddle
		# actions[0][act_1] = 1
		# random action for bot paddle
		# actions[1][act_2] = 1

		# actions[0] = paddle_bot_controller.act()
		actions[0] = paddle_learner_controller.act()
		actions[1] = paddle_bot_controller.act()

		# print actions

		# in observation, image_date from the screen is stored
		reward, hits, observation, hit_reward = PongGame.step(actions)
		# image_data = PongGame.render()

		for i,j in itertools.product(PLAYERS_RANGE, REWARD_TYPES_RANGE):
			cum_reward[i][j] += reward[i][j]

		for i in PLAYERS_RANGE:
			cum_hits[i] += hits[i]
			interm_hits[i] += hits[i]

		# intermediate reward test
		# print 'reward: ', reward

		# reset the actions
		# actions[0][act_1] = 0
		# actions[1][act_2] = 0
	print('Finished.')
	print('Cumulated reward: ', cum_reward)
	print('Cumulated hits: ', cum_hits)

	print('Class internal.')

	print('Cumulated reward : ', PongGame.total_reward)
	print('Cumulated hits : ', PongGame.total_hits)
	print('Ball crossings : ', PongGame.ball_cross_counter)

	return