Python ReplayBuffer.SampleMiniBatch 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils

클래스/타입: ReplayBuffer

메소드/함수: SampleMiniBatch

hotexamples.com에서의 예제들: 2

Python ReplayBuffer.SampleMiniBatch - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.ReplayBuffer.SampleMiniBatch에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

sample(30)

ReplayBuffer(30)

push_and_pop(19)

store_transition(13)

store(10)

add(9)

sample_buffer(9)

push(9)

size(8)

sample_batch(5)

insert(5)

append(4)

LoadBuffer(3)

mergein(2)

GetOccupency(2)

get_max_priority(2)

load(2)

save(2)

sequential_sample(2)

add_transition(2)

SampleMiniBatch(2)

sample_index(1)

sample_goals(1)

sample_tensors(1)

store_effect(1)

store_episode(1)

store_frame(1)

update(1)

restore(1)

recent_and_next_batch_of_seq(1)

recent_and_next_batch(1)

GetMiniBatch(1)

merge_buffer(1)

prepare_batch(1)

collect_minmax(1)

SaveBuffer(1)

StoreTransition(1)

add_batch(1)

add_case(1)

add_goal(1)

add_transition_to_replay_buffer(1)

clear(1)

encode_recent_observation(1)

popleft(1)

gen_index(1)

generate_minibatch(1)

get_batch(1)

get_minibatch(1)

len(1)

load_queues(1)

예제 #1

파일 보기

			dt = 1
		else:
			dt = 0
		totalR += rt
		
		# store transition
		if NEW_EXPERIENCE:
			R.StoreTransition(st, np.array([a_index]), np.array([rt]), st_next, dt)
		st = st_next
		
		E_local=[0]
		if episode_i > OBSERVATION_PHASE:
			E_local=[]
			for mini_batch in xrange(BATCHES):
				# sample mini batch
				s_batch, a_batch, r_batch, stag_batch, terminal_batch, num = R.SampleMiniBatch(MINI_BATCH)
				Y = Q.evaluate(sess, s_batch)
			
				# Double DQN update	
				#Q_next_arg = Q.evaluate(sess, stag_batch)
				#Q_next_argmax = np.argmax(Q_next_arg,1)
				#Q_next_target = Q_target.evaluate(sess, stag_batch)

				#a_batch = a_batch.astype(int)
				#for i in range(num):
				#	Y[i,a_batch[i,0]] = r_batch[i,0] + GAMMA*Q_next_target[i,Q_next_argmax[i]] * (1-terminal_batch[i])

				#if ONLY_OUTPUT:
				#	error = Q.train_output(sess, s_batch, Y)
				#else:
				#	error = Q.train(sess, s_batch, Y)

예제 #2

파일 보기

                simulator.SimulateNeuralEpisode(Q, sess, env_middle, False))
    L.AddRecord('network_right',
                simulator.SimulateNeuralEpisode(Q, sess, env_right, False))
    L.AddRecord(
        'policy_left',
        simulator.SimulatePolicyEpisode(policy, discretizer, env_left, False))
    L.AddRecord(
        'policy_middle',
        simulator.SimulatePolicyEpisode(policy, discretizer, env_middle,
                                        False))
    L.AddRecord(
        'policy_right',
        simulator.SimulatePolicyEpisode(policy, discretizer, env_right, False))
    L.AddRecord('total_reward', totalR)
    L.AddRecord('error', totalE)
    s_est, _, _, _, _, num = R_val.SampleMiniBatch(V_EST)
    Q_est_arg = Q.evaluate(sess, s_est)
    Q_est_argmax = np.argmax(Q_est_arg, 1) * 1.0
    V_est = Q_est_argmax.sum() / num * 1.0
    L.AddRecord('estimated_value', V_est)

    # update target network
    if steps >= C_STEPS:
        Ws, bs = Q.get_weights()
        Q_target.assign(sess, Ws, bs)
        print('updating traget network')
        steps = 0
    steps += 1

    # update reward log
    if onPolicy == False: