def test_crivervalue(): f = utils.pretty_args(cpoker.rivervalue) assert_close(f("2d 6d", "Tc Ts 8s 6c 5s"), 0.620202020202) assert_close(f("Jd Jc", "5s 7s Td 7d 4h"), 0.85202020202) assert_close(f("5c 5s", "2d Ac 9c 2h Jd"), 0.529797979798) assert_close(f("9s 7h", "8s 4c Ah 6d 3c"), 0.0409090909091) assert_close(f("Ts Qs", "Tc Ad Ks 3s 5d"), 0.679797979798) assert_close(f("Ts Jh", "Kh 2d 2h Ah 3h"), 0.926262626263) assert_close(f("Qc 5c", "Jc 6h 7h Ts 2h"), 0.135353535354) assert_close(f("Kd Qh", "7h 5s 6s 4c Jd"), 0.125757575758) assert_close(f("Qh 8d", "4s 8h 8c Th 9d"), 0.927777777778) assert_close(f("9h 2d", "4s 9s 8h 6h 7h"), 0.49595959596) assert_close(f("Td 5s", "Th 4s 8h 5c 2s"), 0.932323232323) assert_close(f("Kc 4h", "7c 3s 8c 5h 6d"), 0.769191919192) assert_close(f("2d 3c", "Jc 4s 6h Kd 4d"), 0.0166666666667) assert_close(f("Td Kc", "6c Qh Js 7c Ks"), 0.866666666667) assert_close(f("Qs 5h", "8c 3h Qd Ts Qh"), 0.939898989899) assert_close(f("Kh 5h", "8h 2h 4s Ks Tc"), 0.850505050505) assert_close(f("3c 3d", "Td 8s Ts 8h 4h"), 0.0166666666667) assert_close(f("Kh Td", "Ah Th 3d 5c 9s"), 0.779797979798) assert_close(f("9c 6s", "Qh 9d Th 7c Qd"), 0.670707070707) assert_close(f("Ac 3d", "6h 5d 7h 9h 7s"), 0.29797979798) assert_close(f("2c Th", "Jd 7s Ah 3h 5h"), 0.119696969697) assert_close(f("9h Ks", "8h 4c 7c 3d 6h"), 0.15) assert_close(f("5h Jh", "2c 4c 9d 7h 5d"), 0.605050505051) assert_close(f("6s Ts", "5d 6d Ad Td Kh"), 0.574747474747) assert_close(f("As 3h", "Qs 3s Qd Jc Ah"), 0.845454545455) assert_close(f("Tc Qc", "3c 4h 4d Ac 5s"), 0.259090909091) assert_close(f("5s 3d", "5d 6d 7c Jc 9c"), 0.307575757576) assert_close(f("Jd 3s", "8h 9s 2s 3c 4h"), 0.521212121212) assert_close(f("Ac Qc", "4d 9d 4h 5h 4c"), 0.638888888889) assert_close(f("3c 9c", "Ac 7s Ah Qc As"), 0.257070707071)
def xtest_crivervalue(): f = utils.pretty_args(cpoker.rivervalue) assert_close(f('2d 6d', 'Tc Ts 8s 6c 5s'), 0.620202020202) assert_close(f('Jd Jc', '5s 7s Td 7d 4h'), 0.85202020202) assert_close(f('5c 5s', '2d Ac 9c 2h Jd'), 0.529797979798) assert_close(f('9s 7h', '8s 4c Ah 6d 3c'), 0.0409090909091) assert_close(f('Ts Qs', 'Tc Ad Ks 3s 5d'), 0.679797979798) assert_close(f('Ts Jh', 'Kh 2d 2h Ah 3h'), 0.926262626263) assert_close(f('Qc 5c', 'Jc 6h 7h Ts 2h'), 0.135353535354) assert_close(f('Kd Qh', '7h 5s 6s 4c Jd'), 0.125757575758) assert_close(f('Qh 8d', '4s 8h 8c Th 9d'), 0.927777777778) assert_close(f('9h 2d', '4s 9s 8h 6h 7h'), 0.49595959596) assert_close(f('Td 5s', 'Th 4s 8h 5c 2s'), 0.932323232323) assert_close(f('Kc 4h', '7c 3s 8c 5h 6d'), 0.769191919192) assert_close(f('2d 3c', 'Jc 4s 6h Kd 4d'), 0.0166666666667) assert_close(f('Td Kc', '6c Qh Js 7c Ks'), 0.866666666667) assert_close(f('Qs 5h', '8c 3h Qd Ts Qh'), 0.939898989899) assert_close(f('Kh 5h', '8h 2h 4s Ks Tc'), 0.850505050505) assert_close(f('3c 3d', 'Td 8s Ts 8h 4h'), 0.0166666666667) assert_close(f('Kh Td', 'Ah Th 3d 5c 9s'), 0.779797979798) assert_close(f('9c 6s', 'Qh 9d Th 7c Qd'), 0.670707070707) assert_close(f('Ac 3d', '6h 5d 7h 9h 7s'), 0.29797979798) assert_close(f('2c Th', 'Jd 7s Ah 3h 5h'), 0.119696969697) assert_close(f('9h Ks', '8h 4c 7c 3d 6h'), 0.15) assert_close(f('5h Jh', '2c 4c 9d 7h 5d'), 0.605050505051) assert_close(f('6s Ts', '5d 6d Ad Td Kh'), 0.574747474747) assert_close(f('As 3h', 'Qs 3s Qd Jc Ah'), 0.845454545455) assert_close(f('Tc Qc', '3c 4h 4d Ac 5s'), 0.259090909091) assert_close(f('5s 3d', '5d 6d 7c Jc 9c'), 0.307575757576) assert_close(f('Jd 3s', '8h 9s 2s 3c 4h'), 0.521212121212) assert_close(f('Ac Qc', '4d 9d 4h 5h 4c'), 0.638888888889) assert_close(f('3c 9c', 'Ac 7s Ah Qc As'), 0.257070707071)
def test_holdem(): def multi(h1, h2, board): r = cpoker.multi_holdem([h1, h2], board) if r != poker.multi_holdem([h1, h2], board): for h in [h1, h2, board]: print utils.make_pretty(h) print r print poker.multi_holdem([h1, h2], board) assert False return r[0] if len(r) == 1 else 2 funcs = [ poker_lite.holdem2p, lambda h1, h2, b: poker_lite.compare(h1 + b, h2 + b), cpoker.holdem2p, poker.holdem2p, multi, ] for f in funcs: f = utils.pretty_args(f) assert f("Td 3d", "Ac As", "Ks 8h 4h 6d Qh") == 1 assert f("Td 5h", "Ad 2s", "Ah 4h Kd 5c 4c") == 1 assert f("2s Td", "7c 8c", "7h Ah 3c 2h 2d") == 0 assert f("Jc 4h", "3h 8d", "9d 5d Js Tc 7s") == 1 assert f("Ts 7c", "Tc 2h", "6s Ad Ac 6h Kc") == 2 assert f("5d 7d", "8d Kd", "3h 5s 3c 5h Qh") == 0 assert f("7s 4h", "Ts Js", "4d 6h 6c Qs 6d") == 0 assert f("7s 6c", "2h Jd", "8c Ks 3h 7c Ts") == 0 assert f("Kd 3d", "Jh 3c", "6d Kh 2s Th 6c") == 0 assert f("2s 4s", "9d Th", "Kc Ks Qs Ah 3c") == 1 assert f("Kh Th", "Qd 4d", "Qh Js 5c 2s 2d") == 1 assert f("8s Td", "Jc Qs", "4h 8d 9s 9h Qd") == 1 assert f("7c Js", "Kd 6s", "3h Ad 4h 2c Tc") == 1 assert f("6c Qh", "Jh Qs", "9c 9s 4h Ts Jc") == 1 assert f("2h 3c", "Tc Kd", "4c 4d Kh 5s 4s") == 1 assert f("Ts 4d", "Ks Ad", "2h 3s 4s 7s Jd") == 0 assert f("4s Qc", "8c Qd", "9h 9s 4c 3d Js") == 0 assert f("3c 2c", "5s 2d", "3h 4h 8s Ad 2h") == 1 assert f("7c Jh", "Ah 6c", "8s 9s Qd 3c 5d") == 1 assert f("Qc Ts", "9h 5d", "5h Jd 5c Js 3c") == 1 assert f("Tc 5h", "6s Jd", "Ah 7d 4h 9s 4s") == 1 assert f("4c 9h", "9c 7h", "Ac 9d Jd 8h 5c") == 2 assert f("4d Ah", "Qs 8s", "9c 3s 2d Ad Ts") == 0 assert f("Th 3d", "Ad 3c", "Jh 7s 5s Ah 2c") == 1 assert f("9h 4s", "3s Jc", "4h Ad 8c 2s 9c") == 0 assert f("7d Js", "Td 8s", "6h 6s Kc 7h 4c") == 0 assert f("2d 6d", "Qd 9d", "Ad 2s 6h Qs 7h") == 0 assert f("Qs Ts", "Th 5d", "9d 2c Qc 3h Ah") == 0 assert f("5h 8d", "4d Jc", "Ac Js Jd 4c 2s") == 1 assert f("4s Ad", "5h 4c", "Jc 9h 3d Qc 2s") == 0 # do some more for good measure for __ in xrange(1000): cards = utils.deal() if len(set([f(*cards) for f in funcs])) != 1: print [f(*cards) for f in funcs] assert False
def test_holdem(): def multi(h1, h2, board): r = cpoker.multi_holdem([h1, h2], board) if r != poker.multi_holdem([h1, h2], board): for h in [h1, h2, board]: print(utils.make_pretty(h)) print(r) print(poker.multi_holdem([h1, h2], board)) assert False return r[0] if len(r) == 1 else 2 funcs = [poker_lite.holdem2p, lambda h1, h2, b:poker_lite.compare(h1 + b, h2 + b), #cpoker.holdem2p, poker.holdem2p, #multi ] for f in funcs: f = utils.pretty_args(f) assert f('Td 3d', 'Ac As', 'Ks 8h 4h 6d Qh') == 1 assert f('Td 5h', 'Ad 2s', 'Ah 4h Kd 5c 4c') == 1 assert f('2s Td', '7c 8c', '7h Ah 3c 2h 2d') == 0 assert f('Jc 4h', '3h 8d', '9d 5d Js Tc 7s') == 1 assert f('Ts 7c', 'Tc 2h', '6s Ad Ac 6h Kc') == 2 assert f('5d 7d', '8d Kd', '3h 5s 3c 5h Qh') == 0 assert f('7s 4h', 'Ts Js', '4d 6h 6c Qs 6d') == 0 assert f('7s 6c', '2h Jd', '8c Ks 3h 7c Ts') == 0 assert f('Kd 3d', 'Jh 3c', '6d Kh 2s Th 6c') == 0 assert f('2s 4s', '9d Th', 'Kc Ks Qs Ah 3c') == 1 assert f('Kh Th', 'Qd 4d', 'Qh Js 5c 2s 2d') == 1 assert f('8s Td', 'Jc Qs', '4h 8d 9s 9h Qd') == 1 assert f('7c Js', 'Kd 6s', '3h Ad 4h 2c Tc') == 1 assert f('6c Qh', 'Jh Qs', '9c 9s 4h Ts Jc') == 1 assert f('2h 3c', 'Tc Kd', '4c 4d Kh 5s 4s') == 1 assert f('Ts 4d', 'Ks Ad', '2h 3s 4s 7s Jd') == 0 assert f('4s Qc', '8c Qd', '9h 9s 4c 3d Js') == 0 assert f('3c 2c', '5s 2d', '3h 4h 8s Ad 2h') == 1 assert f('7c Jh', 'Ah 6c', '8s 9s Qd 3c 5d') == 1 assert f('Qc Ts', '9h 5d', '5h Jd 5c Js 3c') == 1 assert f('Tc 5h', '6s Jd', 'Ah 7d 4h 9s 4s') == 1 assert f('4c 9h', '9c 7h', 'Ac 9d Jd 8h 5c') == 2 assert f('4d Ah', 'Qs 8s', '9c 3s 2d Ad Ts') == 0 assert f('Th 3d', 'Ad 3c', 'Jh 7s 5s Ah 2c') == 1 assert f('9h 4s', '3s Jc', '4h Ad 8c 2s 9c') == 0 assert f('7d Js', 'Td 8s', '6h 6s Kc 7h 4c') == 0 assert f('2d 6d', 'Qd 9d', 'Ad 2s 6h Qs 7h') == 0 assert f('Qs Ts', 'Th 5d', '9d 2c Qc 3h Ah') == 0 assert f('5h 8d', '4d Jc', 'Ac Js Jd 4c 2s') == 1 assert f('4s Ad', '5h 4c', 'Jc 9h 3d Qc 2s') == 0 #do some more for good measure for __ in range(1000): cards = utils.deal() if len(set([f(*cards) for f in funcs])) != 1: print([f(*cards) for f in funcs]) assert False
writer.add_scalar('Eval/reward/best', best_perf, step) env.close() eval_env.close() writer.close() if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--env-id', type=str, default='BipedalWalker-v2') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--num-envs', type=int, default=8) parser.add_argument('--hid-dim', type=int, default=64) parser.add_argument('--num-steps', type=int, default=4000000) parser.add_argument('--rollout-steps', type=int, default=2048) parser.add_argument('--num-epochs', type=int, default=10) parser.add_argument('--batch-size', type=int, default=64) parser.add_argument('--lr_max', type=float, default=3e-4) parser.add_argument('--lr_min', type=float, default=1e-4) parser.add_argument('--eps', type=float, default=0.2) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--lam', type=float, default=0.95) parser.add_argument('--alpha', type=float, default=0.0) parser.add_argument('--value-coef', type=float, default=0.5) parser.add_argument('--entropy-coef', type=float, default=0.0) parser.add_argument('--max-grad-norm', type=float, default=0.5) parser.add_argument('--target-kldiv', type=float, default=0.02) parser.add_argument('--num-eval', type=int, default=5) args = parser.parse_args(); pretty_args(args) main(args)