"blocks4/task03.pddl", "blocks4/task04.pddl", "blocks4/task05.pddl", "blocks4/task06.pddl", "blocks4/task07.pddl", "blocks4/task08.pddl", "blocks4/task09.pddl", "blocks4/task10.pddl", "blocks5/task01.pddl", "blocks5/task02.pddl", "blocks5/task03.pddl", "blocks5/task04.pddl", "blocks5/task05.pddl", "blocks5/task06.pddl", "blocks5/task07.pddl", "blocks5/task08.pddl", "blocks5/task09.pddl", "blocks5/task10.pddl", ], ) assert len(_CONFIGURATION.problems) == 30 if __name__ == "__main__": train_wrapper( args=get_training_args( configurations=[_CONFIGURATION], # 10 minutes max_training_time=10 * 60, ), domain_name="blocks")
from default_args import get_training_args, DomainAndProblemConfiguration from train import train_wrapper _CONFIGURATION = DomainAndProblemConfiguration( base_directory="../benchmarks/ferry", domain_pddl="ferry.pddl", # {2, 3, 4 locations} x {1, 2, 3} cars = 9 problems problem_pddls=[ "train/ferry-l2-c1.pddl", "train/ferry-l2-c2.pddl", "train/ferry-l2-c3.pddl", "train/ferry-l3-c1.pddl", "train/ferry-l3-c2.pddl", "train/ferry-l3-c3.pddl", "train/ferry-l4-c1.pddl", "train/ferry-l4-c2.pddl", "train/ferry-l4-c3.pddl", ], ) assert len(_CONFIGURATION.problems) == 9 if __name__ == "__main__": train_wrapper( args=get_training_args( configurations=[_CONFIGURATION], # 3 minutes max_training_time=3 * 60, num_folds=5, ), domain_name="ferry")
def main(): # required for test and train: # parsed by the main parser - this group is shared g_parser = argparse.ArgumentParser(add_help=False) g_parser.add_argument( '--num-episodes', type=int, help= 'number of running episodes (default is 1000 for train, and 5 for test' ) g_parser.add_argument( '--build', default=None, type=str, required=True, help='path of the unity build file, to run inside Unity - enter None') g_parser.add_argument('--weights-path', type=str, required=True, help='path to weights dir') g_parser.add_argument('--agent', choices=['ddpg', 'mddpg', 'maddpg'], required=True, help='type of agent') g_parser.add_argument('--num-agents', choices=range(1, 9), default=4, type=int, metavar='[1-8]', help='number of agents (cars)') g_parser.add_argument('--num-obstacles', choices=range(0, 17), default=4, type=int, metavar='[0-16]', help='number of random obstacles') # general group end parser = argparse.ArgumentParser( prog='RL_Multi_agent_Cars', description='please choose train or test to get specific help' ' (e.g main.py train -h)') subparsers = parser.add_subparsers(help='two available running modes', dest='subparser_name') # define new sub-command subparsers.add_parser('test', help='run test mode', parents=[g_parser]) # required for train only: # parse by the train command sub-parser train_parser = subparsers.add_parser('train', help='run train mode', parents=[g_parser]) train_parser.add_argument( '--save-mem', action='store_true', help='save the replay buffer during training for later use', ) train_parser.add_argument('--scores-avg-window', choices=range(0, 101), metavar='[0-100]', default=50, type=int, help='number of last scores to average') train_parser.add_argument( '--load-weights', action='store_true', help='add this to load weights from previous runs') train_parser.add_argument( '--load-mem', action='store_true', help='add this to load replay buffer from previous run') train_parser.add_argument( '--mem-path', type=str, help='path of replay buffer file to load or store') train_parser.add_argument('--solved-score', default=40, type=int, help='score that complete the episode') train_parser.add_argument( '--show-graphics', action='store_true', help='add this to show graphics (slows down training)') train_parser.add_argument( '--print-agent-loss', action='store_true', help='print agent\'s loss after each episode (default=False)') train_parser.add_argument( '--save-best-weights', action='store_true', help= 'save the best weights so far (by average score). saving directory will be the' ' same as weights-path with suffix \'best\' default=False') train_parser.add_argument( '--save-score-log', action='store_true', help= 'saves a csv file with the ongoing scores of each episode (default=False)' ) args = parser.parse_args() if args.num_episodes is None: args.num_episodes = 1000 if args.subparser_name == 'train' else 5 env_config = { 'num_agents': args.num_agents, 'num_obstacles': args.num_obstacles, 'setting': 0 } wrapper_config = vars(args) wrapper_config['agent'] = select_agent(wrapper_config['agent']) print('starting {} with arguments:\n{}'.format(args.subparser_name, wrapper_config)) if args.subparser_name == 'test': test_wrapper(env_config, wrapper_config) else: train_wrapper(env_config, wrapper_config)
"train/zenotravel-cities3-planes3-people3-1826.pddl", "train/zenotravel-cities3-planes3-people5-4582.pddl", ], ) assert len(_ZENOTRAVEL_CONFIGURATION.problems) == 10 _GRIPPER_CONFIGURATION = DomainAndProblemConfiguration( base_directory="../benchmarks/gripper", domain_pddl="domain.pddl", # First 3 gripper probs problem_pddls=[ "problems/gripper-n1.pddl", "problems/gripper-n2.pddl", "problems/gripper-n3.pddl", ], ) assert len(_GRIPPER_CONFIGURATION.problems) == 3 if __name__ == "__main__": train_wrapper( args=get_training_args( configurations=[ _BLOCKSWORLD_CONFIGURATION, _ZENOTRAVEL_CONFIGURATION, _GRIPPER_CONFIGURATION, ], # 15 minutes max_training_time=15 * 60, ), domain_name="multi")
"blocks5/task03.pddl", "blocks5/task04.pddl", "blocks5/task05.pddl", ], ) assert len(_BLOCKSWORLD_CONFIGURATION.problems) == 10 _GRIPPER_CONFIGURATION = DomainAndProblemConfiguration( base_directory="../benchmarks/gripper", domain_pddl="domain.pddl", # First 3 gripper probs problem_pddls=[ "problems/gripper-n1.pddl", "problems/gripper-n2.pddl", "problems/gripper-n3.pddl", ], ) assert len(_GRIPPER_CONFIGURATION.problems) == 3 if __name__ == "__main__": train_wrapper( args=get_training_args( configurations=[ _BLOCKSWORLD_CONFIGURATION, _GRIPPER_CONFIGURATION, ], # 10 minutes max_training_time=10 * 60, ), domain_name="indepbg")
_ZENOTRAVEL_CONFIGURATION = DomainAndProblemConfiguration( base_directory="../benchmarks/zenotravel", domain_pddl="domain.pddl", # 5 x {2, 3 cities} = 10 Zenotravel problems problem_pddls=[ "train/zenotravel-cities2-planes1-people3-8798.pddl", "train/zenotravel-cities2-planes2-people3-9145.pddl", "train/zenotravel-cities2-planes3-people3-3417.pddl", "train/zenotravel-cities2-planes4-people2-4892.pddl", "train/zenotravel-cities2-planes4-people4-6874.pddl", "train/zenotravel-cities3-planes1-people3-4791.pddl", "train/zenotravel-cities3-planes2-people3-8752.pddl", "train/zenotravel-cities3-planes2-people5-7306.pddl", "train/zenotravel-cities3-planes3-people3-1826.pddl", "train/zenotravel-cities3-planes3-people5-4582.pddl", ], ) assert len(_ZENOTRAVEL_CONFIGURATION.problems) == 10 if __name__ == "__main__": train_wrapper( args=get_training_args( configurations=[ _GRIPPER_CONFIGURATION, _ZENOTRAVEL_CONFIGURATION, ], # 10 minutes max_training_time=10 * 60, ), domain_name="indepgz")
from default_args import get_training_args, DomainAndProblemConfiguration from train import train_wrapper _CONFIGURATION = DomainAndProblemConfiguration( base_directory="../benchmarks/gripper", domain_pddl="domain.pddl", # {1, 2, 3 balls} = 3 problems problem_pddls=[ "problems/gripper-n1.pddl", "problems/gripper-n2.pddl", "problems/gripper-n3.pddl", ], ) assert len(_CONFIGURATION.problems) == 3 if __name__ == "__main__": train_wrapper(args=get_training_args( configurations=[_CONFIGURATION], # 90 seconds max_training_time=90, num_bins=3, ))