Exemplos de Memory.addTimeStepExperience em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: Memory

Classe / Tipo: Memory

Método / Função: addTimeStepExperience

Exemplos em hotexamples.com: 1

Memory.addTimeStepExperience em Python - 1 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de Memory.Memory.addTimeStepExperience em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Memory(30)

add(14)

batch_update(7)

__init__(5)

clear(3)

fetch(3)

get_size(2)

full(2)

copy(2)

cpu(2)

add_sample(2)

clear_memory(2)

get_doublewordi(1)

get_doubleword(1)

Decrement(1)

get_discountedRewardSum(1)

get_all_sample(1)

getWriteMiss(1)

getValue(1)

getSystemMemory(1)

getSwapMemory(1)

getMisses(1)

get_binary_string(1)

get_oldest_memory(1)

get_init_state(1)

get_instruction_keys(1)

getHits(1)

get_row(1)

get_sample(1)

get_string(1)

get_val(1)

init(1)

load(1)

print_self(1)

readCallHistory(1)

readMsgHistory(1)

set_up(1)

writeCallHistory(1)

getMemoryBatch(1)

forget_oldest_memory(1)

getAverageSystemMemory(1)

append(1)

GetMinibatch(1)

GetValue(1)

Increment(1)

Init(1)

MoveLeft(1)

MoveRight(1)

RecordExperience(1)

SetCellVal(1)

Métodos Frequentes

Memory (30)

add (14)

batch_update (7)

__init__ (5)

clear (3)

fetch (3)

get_size (2)

full (2)

copy (2)

cpu (2)

Métodos Frequentes

add_sample (2)

clear_memory (2)

get_doublewordi (1)

get_doubleword (1)

Decrement (1)

get_discountedRewardSum (1)

get_all_sample (1)

getWriteMiss (1)

getValue (1)

getSystemMemory (1)

getSwapMemory (1)

getMisses (1)

get_binary_string (1)

get_oldest_memory (1)

get_init_state (1)

get_instruction_keys (1)

getHits (1)

get_row (1)

get_sample (1)

get_string (1)

Métodos Frequentes

getSwapMemory (1)

getMisses (1)

get_binary_string (1)

get_oldest_memory (1)

get_init_state (1)

get_instruction_keys (1)

getHits (1)

get_row (1)

get_sample (1)

get_string (1)

get_val (1)

init (1)

load (1)

print_self (1)

readCallHistory (1)

readMsgHistory (1)

set_up (1)

writeCallHistory (1)

getMemoryBatch (1)

forget_oldest_memory (1)

getAverageSystemMemory (1)

append (1)

GetMinibatch (1)

GetValue (1)

Increment (1)

Init (1)

MoveLeft (1)

MoveRight (1)

RecordExperience (1)

SetCellVal (1)

Métodos Frequentes

get_val (1)

init (1)

load (1)

print_self (1)

readCallHistory (1)

readMsgHistory (1)

set_up (1)

writeCallHistory (1)

getMemoryBatch (1)

forget_oldest_memory (1)

getAverageSystemMemory (1)

append (1)

GetMinibatch (1)

GetValue (1)

Increment (1)

Init (1)

MoveLeft (1)

MoveRight (1)

RecordExperience (1)

SetCellVal (1)

SetValue (1)

addMemory (1)

addTimeStepExperience (1)

byte_write (1)

get (1)

call (1)

call_init (1)

convert (1)

create_grid (1)

delete (1)

dump (1)

endproc (1)

era (1)

find_experiences (1)

GetCellVal (1)

free (1)

free_memory_to_save (1)

writeMsgHistory (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: train.py Projeto: niulanqi/DQSAKeras

def trainDqsa(callbacks, logger, centralNet: DQSAVersion2, centralTarget: DQSAVersion2): """ training loop for the DQSA :param callbacks: callbacks to write to TB :param logger: logger handler :param centralNet: the central net which we train every M episodes :param centralTarget: target central net for double DQN """ logger.info("start_training") Tensorcallback = callbacks['tensorboard'] env = OneTimeStepEnv() alpha = 0.0 beta = 20 #userNets = createUserNets(config.N) userNet = DQSAVersion2(input_size=config.input_size_user, usernet=True) # synchWithCentral(userNets=userNets, path=config.load_ckpt_path) # actionThatUsersChose = np.zeros((config.N, 1)) ER = ExperienceReplay() best_channel_throughput_so_far = 0 channelThroughPutPerTstep = initCTP( ) # init the data structure to view the mean reward at each t for iteration in range(config.Iterations): # ----- start iteration loop ----- if (iteration + 1) % 2 == 0: if best_channel_throughput_so_far > 0.9: centralTarget.load_weights( config.best_ckpt_path) # target synch with central else: centralTarget.load_weights(config.ckpt_path) logger.info("TargetNet synched") channelThroughPutMean = 0 loss_value = [] collisonsMean = 0 idle_timesMean = 0 collisons = 0 idle_times = 0 channelThroughPut = 0 for episode in range(config.Episodes): # ----- start episode loop ----- episodeMemory = Memory( numOfUsers=config.N) # initialize a memory for the episode Xt = env.reset() userNet.reset_states() # Xt = np.expand_dims(Xt, axis=1) for tstep in range(config.TimeSlots): # ----- start time-steps loop ----- UserQvalues = userNet(Xt) actionThatUsersChose = [ getAction(Qvalues=UserQvalue, temperature=beta, alpha=alpha) for UserQvalue in UserQvalues ] for usr in range(config.N): env.step(action=actionThatUsersChose[usr], user=usr) nextStateForEachUser, rewardForEachUser, ack_vector = env.getNextState( ) # also resets the env for the next t step episodeMemory.addTimeStepExperience( state=Xt, nextState=nextStateForEachUser, rewards=rewardForEachUser, actions=np.squeeze(actionThatUsersChose)) # accumulating the experience at time step tstep Xt = nextStateForEachUser # state = next_State # for debug purposes collisons += env.collisions idle_times += env.idle_times ack_sum = np.sum( ack_vector ) # rewards are calculated by the ACK signal as competitive reward mechanism channelThroughPutPerTstep[tstep + 1].append(ack_sum) channelThroughPut += ack_sum # ----- end time-steps loop ----- # the episode has ended so we add the episode memory to ER and reset the usr states ER.add_memory( memory=episodeMemory ) # after the tstep loop we insert the episode experience into the ER #resetUserStates(userNets) # reset the user's lstm states if ( episode + 1 ) % config.debug_freq == 0: # for debugging purposes, please ignore collisons /= config.TimeSlots * config.debug_freq idle_times /= config.TimeSlots * config.debug_freq channelThroughPut /= config.TimeSlots * config.debug_freq channelThroughPutMean += channelThroughPut collisonsMean += collisons idle_timesMean += idle_times tstlearningrate = centralNet.model.optimizer.learning_rate.numpy( ) logger.info( "Iteration {}/{}- Episode {}/{}: collisions {}, idle_times {} ,channelThroughput is {}, learning rate is {}, beta is {} and alpha is {}" .format(iteration, config.Iterations, episode, config.Episodes, collisons, idle_times, channelThroughPut, tstlearningrate, beta, alpha)) print( "Iteration {}/{}- Episode {}/{}: collisions {}, idle_times {}, channelThroughput is {}, learning rate is {}, beta is {} and alpha is {}" .format(iteration, config.Iterations, episode, config.Episodes, collisons, idle_times, channelThroughPut, tstlearningrate, beta, alpha)) collisons = 0 idle_times = 0 channelThroughPut = 0 if (ER.currentPosition ) % config.M == 0: # training phase every M episodes loss = centralNet.fit( lr=config.learning_rate_schedule(iteration) * config.learning_rate_multiplier, centralTarget=centralTarget, ER=ER) loss_value.append(loss) centralNet.save_weights( config.ckpt_path ) # save new weights (new policy) in ckpt_path ER.flush() # clear out the ER after use if best_channel_throughput_so_far > 0.9: userNet.load_weights( config.best_ckpt_path) # target synch with central else: userNet.load_weights(path=config.ckpt_path) # resetUserStates(userNets) # reset the user's lstm states # ----- end episode loop ----- channelThroughPutMean /= config.Episodes // config.debug_freq collisonsMean /= config.Episodes // config.debug_freq idle_timesMean /= config.Episodes // config.debug_freq loss_value = np.mean(loss_value) if (iteration + 1) % 5 == 0: # every debug freq iterations # we draw the mean reward for each time step channelThroughPutPerTstep = [ np.mean(x) for x in channelThroughPutPerTstep ] for i, x in enumerate(channelThroughPutPerTstep): logs = {'channelThroughputTstep': x} Tensorcallback.on_epoch_end(epoch=iteration * (config.TimeSlots + 2) + i, logs=logs) channelThroughPutPerTstep = initCTP() # init the data structure # every iteration we draw stuff on TB logger.info( "Iteration{}/{}: channelThroughput mean is {}, loss {}, collisions is {} and idle_times {}" .format(iteration, config.Iterations, channelThroughPutMean, loss_value, collisonsMean, idle_timesMean)) print( "Iteration {}/{}:channelThroughput mean is {}, loss {} collisions is {} and idle_times {}" .format(iteration, config.Iterations, channelThroughPutMean, loss_value, collisonsMean, idle_timesMean)) logs = { 'channelThroughput': channelThroughPutMean, "collisons": collisonsMean, "idle_times": idle_timesMean, "loss_value": loss_value } Tensorcallback.on_epoch_end(epoch=iteration, logs=logs) beta = config.temperature_schedule(beta=beta) alpha = lower_epsilon(alpha) # lowering the exploration rate if best_channel_throughput_so_far < channelThroughPutMean: best_channel_throughput_so_far = channelThroughPutMean centralNet.save_weights(config.best_ckpt_path)