Пример #1
0
def batch_learning_update(actor, critic, target_actor, target_critic, params):
    mongo = MongoDB()
    actor.train()
    query = {'training_round': params['training_round']}
    projection = {
        'obs': 1,
        'state': 1,
        'betsize_mask': 1,
        'action_mask': 1,
        'action': 1,
        'reward': 1,
        '_id': 0
    }
    db_data = mongo.get_data(query, projection)
    trainloader = return_trajectoryloader(db_data)
    for _ in range(params['learning_rounds']):
        losses = []
        for i, data in enumerate(trainloader):
            critic_loss = update_actor_critic_batch(data, actor, critic,
                                                    target_actor,
                                                    target_critic, params)
            losses.append(critic_loss)
        # print(f'Learning Round {i}, critic loss {sum(losses)}, policy loss {sum(policy_losses)}')
    mongo.close()
    return actor, critic, params
Пример #2
0
def dual_learning_update(actor, critic, target_actor, target_critic, params,
                         rank):
    mongo = MongoDB()
    actor.train()
    query = {'training_round': params['training_round'], 'rank': rank}
    projection = {
        'obs': 1,
        'state': 1,
        'betsize_mask': 1,
        'action_mask': 1,
        'action': 1,
        'reward': 1,
        '_id': 0
    }
    data = mongo.get_data(query, projection)
    for i in range(params['learning_rounds']):
        policy_losses = []
        losses = []
        for poker_round in data:
            update_actor_critic(poker_round, critic, target_critic, actor,
                                target_actor, params)
        soft_update(critic, target_critic, params['device'])
        soft_update(actor, target_actor, params['device'])
    mongo.close()
    del data
    return actor, critic, params
Пример #3
0
 def __set_fast_data(img_file_path, lbl):
     payload = list()
     db_handle = MongoDB()
     feature_vector = FeatureExtractor().get_features(img_file_path)
     feature_map = dict()
     key_p = os.path.splitext(os.path.basename(img_file_path))
     key = key_p[0] + '_' + key_p[1][1:] + '_' + str(
         int(time.time() * 1000.0))
     key = key.replace('.', '_')
     feature_map['file'] = key
     feature_map['label'] = lbl
     feature_map['feature'] = feature_vector
     payload.append(feature_map)
     try:
         db_handle.to_db(payload=payload,
                         key=None,
                         db=MONGO_HOPS_DB,
                         collection=MONGO_XRAY_COLLECTION)
         payload.clear()
         db_handle.close()
     except Exception as e:
         db_handle.close()
         print(img_file_path)
         print("Ignoring Exception : " + str(e))
Пример #4
0
 learning_params['critic_lrscheduler'] = critic_lrscheduler
 # training loop
 # generate_trajectories(env,actor,critic,training_params,id=0)
 # actor,critic,learning_params = dual_learning_update(actor,critic,target_actor,target_critic,learning_params)
 if args.single:
     train_dual(0, env, actor, critic, target_actor, target_critic,
                training_params, learning_params, network_params,
                validation_params)
 else:
     actor.share_memory()
     critic.share_memory()
     for e in range(training_params['lr_steps']):
         # Clean mongo
         mongo = MongoDB()
         mongo.clean_db()
         mongo.close()
         now = datetime.datetime.now()
         print(
             f'Current date and time : {now.strftime("%Y-%m-%d %H:%M:%S")}'
         )
         tic = time.time()
         mp.spawn(train_dual,
                  args=(env, actor, critic, target_actor, target_critic,
                        training_params, learning_params,
                        network_params, validation_params),
                  nprocs=num_processes)
         print(f'Training completed in {(time.time()-tic)/60} minutes')
         learning_params['actor_lrscheduler'].step()
         learning_params['critic_lrscheduler'].step()
         training_params['training_round'] = (
             e + 1) * training_params['training_epochs']