def generate_fig3(): fig = Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_subplot(1, 1, 1) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.set_title("State Space") e = patches.Ellipse(*ELLIPSE_SMALL[1], edgecolor="darkgreen", facecolor="green", linewidth=1, alpha=0.4, label="Possible $\\pi_1$ States") ax.add_artist(e) x = np.random.random(500) y = np.random.random(500) s = lines.Line2D(x, y, linestyle="None", marker=".", label="Samples", markersize=2, color="darkblue") ax.add_artist(s) s.set_clip_path(e) ax.legend(handles=[e, s], edgecolor="black", loc="upper left", fontsize="x-small") common.save_next_fig(PART_NUM, fig)
def generate_fig2(): fig = Figure(figsize=(10, 5)) canvas = FigureCanvas(fig) ax = fig.add_axes((0., 0., 1., 1.)) common.set_ax_params(ax) ax.set_facecolor((0., 0., 0.)) ax.axis([ common.LEFT - common.PADDLE_WIDTH - common.BALL_RADIUS, common.RIGHT + common.PADDLE_WIDTH + common.BALL_RADIUS, common.BOTTOM - common.BALL_RADIUS, common.TOP + common.BALL_RADIUS, ]) l = patches.Rectangle( (common.LEFT - common.PADDLE_WIDTH - common.BALL_RADIUS, -0.3 - common.HPL), common.PADDLE_WIDTH, 2 * common.HPL, color=common.PADDLE_COLOR) r = patches.Rectangle( (common.RIGHT + common.BALL_RADIUS, 0.8 - common.HPL), common.PADDLE_WIDTH, 2 * common.HPL, color=common.PADDLE_COLOR) ball = patches.Circle((0.6, 0.6), radius=common.BALL_RADIUS, color=common.BALL_COLOR) ax.add_patch(l) ax.add_patch(r) ax.add_patch(ball) common.save_next_fig(PART_NUM, fig)
def generate_fig6(): fig = Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_subplot(1, 1, 1) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.set_title("State Space") handles = [] for i, p in enumerate(ELLIPSE_BIG): ax.add_artist( patches.Ellipse(*p, edgecolor="None", facecolor="blue", linewidth=1, alpha=0.2)) handles.append( patches.Ellipse( (0., 0.), 0., 0., 0, edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8**(i + 1), label="{} Q update{}".format(i + 1, "s" if i else ""))) ax.legend(handles=handles, edgecolor="black", loc="upper left", fontsize="x-small") common.save_next_fig(PART_NUM, fig)
def generate_fig1(width=5): fig = Figure(figsize=(width, 5)) canvas = FigureCanvas(fig) ax = fig.add_axes((0., 0., 1., 1.)) common.set_ax_params(ax) ax.set_facecolor((0., 0., 0.)) ax.axis([ common.LEFT - common.PADDLE_WIDTH - common.BALL_RADIUS, common.RIGHT + common.PADDLE_WIDTH + common.BALL_RADIUS, common.BOTTOM - common.BALL_RADIUS, common.TOP + common.BALL_RADIUS, ]) l = patches.Rectangle( (common.LEFT - common.PADDLE_WIDTH - common.BALL_RADIUS, 0.6 - common.HPL), common.PADDLE_WIDTH, 2 * common.HPL, color=common.PADDLE_COLOR) r = patches.Rectangle( (common.RIGHT + common.BALL_RADIUS, -0.5 - common.HPL), common.PADDLE_WIDTH, 2 * common.HPL, color=common.PADDLE_COLOR) ball = patches.Circle((0.6, 0.6), radius=common.BALL_RADIUS, color=common.BALL_COLOR) a = patches.FancyArrow(0.6, 0.6, 0.2, 0.06, width=0.01, color="pink") ax.add_patch(a) ax.add_patch(l) ax.add_patch(r) ax.add_patch(ball) font_dict = {"family": "monospace", "size": "large", "weight": "bold"} l_text = ax.text(common.LEFT, common.BOTTOM, "Follow", color=common.NAME_COLOR, ha="left", **font_dict) r_text = ax.text(common.RIGHT, common.BOTTOM, "Not Predict", color=common.NAME_COLOR, ha="right", **font_dict) common.save_next_fig(PART_NUM, fig)
def generate_fig7(): fig = Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_subplot(1, 1, 1) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.set_title("State Space") color_ellipses = [] bg_ellipses = [] for i, p in enumerate(ELLIPSE_BIG): color_ellipses.append( patches.Ellipse(*p, edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8**(4 - i))) bg_ellipses.append( patches.Ellipse(*p, edgecolor="None", facecolor="lightgrey", linewidth=1)) for i in range(3, -1, -1): ax.add_artist(color_ellipses[i]) if i: ax.add_artist(bg_ellipses[i - 1]) bg_ellipses[i].set_clip_path(color_ellipses[i]) handles = [] for i in range(4): handles.append( patches.Ellipse( (0., 0.), 0., 0., 0, edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8**(i + 1), label="{} Q update{}".format(i + 1, "s" if i else ""))) ax.legend(handles=handles, edgecolor="black", loc="upper left", fontsize="x-small") common.save_next_fig(PART_NUM, fig)
def generate_fig1(width=4): fig = Figure(figsize=(width, 4)) canvas = FigureCanvas(fig) ax = fig.add_axes((0.01, 0.01, 0.98, 0.98)) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) for i, w in enumerate([0.1, 0.3, 0.5, 0.7, 0.9]): ax.add_patch( patches.FancyBboxPatch((0.5 - w / 2., 0.5 - w / 2.), width=w, height=w, boxstyle="round,pad=0.01", facecolor="blue", edgecolor="None", alpha=0.3)) for i, w in enumerate([0.1, 0.3, 0.5, 0.7, 0.9]): ax.add_patch( patches.FancyBboxPatch( (0.5 - w / 2., 0.5 - w / 2.), width=w, height=w, boxstyle="round,pad=0.01", facecolor="None", edgecolor="black", )) if i > 0: ax.text(0.5 - w / 2., 0.5 - w / 2., "$\\pi_{}=$MCTS$(\\pi_{})$".format(i, i - 1), ha="left", va="bottom", size="small", color="yellow") else: ax.text(0.5 - w / 2., 0.5 - w / 2., "$\\pi_0$", ha="left", va="bottom", size="small", color="yellow") common.save_next_fig(PART_NUM, fig)
def generate_fig1(): fig = Figure(figsize=(6, 4)) canvas = FigureCanvas(fig) ax = fig.add_axes((0.01, 0.01, 0.98, 0.98)) common.set_ax_params(ax) ax.axis([0., 1.5, 0., 1.]) r = 0.05 ax.add_patch(patches.FancyBboxPatch( (0.1 - r, 0.5 - r), width=2 * r, height=2 * r, boxstyle="round,pad=0.01", facecolor="lightblue" )) ax.text(0.1, 0.5, "$s$", ha="center", va="center", size="large") heights = np.linspace(0.8, 0.2, 3) x = np.linspace(0.3 + r + 0.01, 1.4, 10) for i in range(3): h = heights[i] for j in range(3): base = h + (j - 1) / 12. y = base + np.random.uniform(-1., 1., 10) / 30. y[0] = h + (j - 1) / 24. ax.add_artist(lines.Line2D(x, y, color="black")) ax.add_patch(patches.Circle((x[-1], y[-1]), 0.01, color="black")) ax.add_patch(patches.FancyBboxPatch( (0.3 - r, h - r), width=2 * r, height=2 * r, boxstyle="round,pad=0.01", facecolor="lightgreen" )) ax.text(0.3, h, "$a_{}$".format(i), ha="center", va="center", size="large") ax.add_patch(common.arrow_by_start_end( (0.1 + r + 0.01, 0.5 + r * (1 - i) / 3.), (0.3 - r - 0.01, h), length_includes_head=True, color="black", head_width=0.02)) common.save_next_fig(PART_NUM, fig)
def generate_fig4(): fig = Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_subplot(1, 1, 1) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.set_title("State Space") ax.add_artist( patches.Ellipse(*ELLIPSE_BIG[0], edgecolor="None", facecolor="blue", linewidth=1, alpha=0.2)) ax.add_artist( patches.Ellipse(*ELLIPSE_BIG[1], edgecolor="None", facecolor="blue", linewidth=1, alpha=0.2)) e_0 = patches.Ellipse(*ELLIPSE_BIG[0], edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8, label="1 Q update") e_1 = patches.Ellipse(*ELLIPSE_BIG[0], edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8**2, label="2 Q updates") ax.legend(handles=[e_0, e_1], edgecolor="black", loc="upper left", fontsize="x-small") common.save_next_fig(PART_NUM, fig)
def generate_fig5(): fig = Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_subplot(1, 1, 1) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.set_title("State Space") e = patches.Ellipse(*ELLIPSE_SMALL[3], edgecolor="darkgreen", facecolor="green", linewidth=1, alpha=0.4, label="Possible $\\pi_n$ States") ax.add_artist(e) ax.legend(handles=[e], edgecolor="black", loc="upper left", fontsize="x-small") common.save_next_fig(PART_NUM, fig)
def generate_fig2(): fig = Figure(figsize=(4, 10)) canvas = FigureCanvas(fig) ax = fig.add_axes((0.01, 0.01, 0.98, 0.98)) common.set_ax_params(ax) ax.axis([0., 1., 0., 2.5]) w0 = 0.1 w1 = 0.3 H = np.linspace(2.2, 0.3, 5) for i, h in enumerate(H): for w in [w0, w1]: ax.add_patch( patches.FancyBboxPatch((0.5 - w / 2., h - w / 2.), width=w, height=w, boxstyle="round,pad=0.01", facecolor="blue", edgecolor="None", alpha=0.3)) for w in [w0, w1]: ax.add_patch( patches.FancyBboxPatch((0.5 - w / 2., h - w / 2.), width=w, height=w, boxstyle="round,pad=0.01", facecolor="None", edgecolor="black")) ax.text(0.5 - w0 / 2., h - w0 / 2., "$\\pi_{}$".format(i), ha="left", va="bottom", size="large", color="yellow") ax.text(0.5 - w1 / 2., h - w1 / 2., "MCTS$(\\pi_{})$".format(i), ha="left", va="bottom", size="large", color="yellow") if i < 4: ax.add_artist( common.arrow_by_start_end( [0.5 - w1 / 2. - 0.01, h - w1 / 2. - 0.01], [0.5 - w0 / 2. - 0.01, H[i + 1] + w0 / 2. + 0.01], color="black", width=0.005, length_includes_head=True, alpha=0.3)) ax.add_artist( common.arrow_by_start_end( [0.5 + w1 / 2. + 0.01, h - w1 / 2. - 0.01], [0.5 + w0 / 2. + 0.01, H[i + 1] + w0 / 2. + 0.01], color="black", width=0.005, length_includes_head=True, alpha=0.3)) common.save_next_fig(PART_NUM, fig)
def generate_fig5(): def possible_v(ax, dir1, dir2, size, start): t1 = np.arctan2(*dir1) t2 = np.arctan2(*dir2) a1 = common.arrow_by_start_size_angle(start, size, t1, width=0.01, color="pink") a2 = common.arrow_by_start_size_angle(start, size, t2, width=0.01, color="pink") arc = patches.Arc(start, size, size, 0.0, np.degrees(t1), np.degrees(t2), color="pink") ax.add_patch(a1) ax.add_patch(a2) ax.add_patch(arc) fig = Figure(figsize=(8, 8)) canvas = FigureCanvas(fig) fig.suptitle("Demonstration of How Descritezation Creates Non-Markovian Models") gs = gridspec.GridSpec(2, 2) ax1 = fig.add_subplot(gs[0, :]) ax2 = fig.add_subplot(gs[1, 0]) ax3 = fig.add_subplot(gs[1, 1]) for ax in [ax1, ax2, ax3]: common.set_ax_params(ax, "black") ax.axis([-1., 1., -1., 1.]) for v in np.linspace(-1., 1., 5)[1:-1]: x_axis = lines.Line2D([-1., 1.], [v, v], color="red", alpha=0.5, linestyle="--") y_axis = lines.Line2D([v, v], [-1., 1.], color="red", alpha=0.5, linestyle="--") ax.add_artist(x_axis) ax.add_artist(y_axis) ax1.set_title("Current Partial State") c_pos = patches.Rectangle((0., 0.), 0.5, 0.5, color="palevioletred", alpha=0.7) ax1.add_patch(c_pos) possible_v(ax1, (0.2, 1.), (1., 0.2), 0.4, (0.25, 0.25)) ax2.set_title("Possible Past 1 &\nImplications on Current State") c_pos = patches.Rectangle((0., 0.), 0.5, 0.5, color="palevioletred", alpha=0.7) p1_pos = patches.Rectangle((-0.5, 0.), 0.5, 0.5, color="palevioletred", alpha=0.5) p2_pos = patches.Rectangle((-1., 0.), 0.5, 0.5, color="palevioletred", alpha=0.3) ax2.add_patch(c_pos) ax2.add_patch(p1_pos) ax2.add_patch(p2_pos) possible_v(ax2, (0.2, 1.), (1., 1.), 0.4, (0.25, 0.25)) ax3.set_title("Possible Past 2 &\nImplications on Current State") c_pos = patches.Rectangle((0., 0.), 0.5, 0.5, color="palevioletred", alpha=0.7) p1_pos = patches.Rectangle((0., -0.5), 0.5, 0.5, color="palevioletred", alpha=0.5) p2_pos = patches.Rectangle((0., -1.), 0.5, 0.5, color="palevioletred", alpha=0.3) ax3.add_patch(c_pos) ax3.add_patch(p1_pos) ax3.add_patch(p2_pos) possible_v(ax3, (1., 1.), (1., 0.2), 0.4, (0.25, 0.25)) common.save_next_fig(PART_NUM, fig)
def generate_fig2(): fig = Figure(figsize=(4, 4)) canvas = FigureCanvas(fig) ax = fig.add_axes((0.01, 0.01, 0.98, 0.98)) common.set_ax_params(ax) ax.set_facecolor("white") ax.axis([0., 1., 0., 1.]) ax.add_artist( patches.Rectangle((0.2, 0.1), 0.7, 0.7, facecolor="lightgrey", edgecolor="None")) ax.add_artist(lines.Line2D([0.55, 0.55], [0.1, 0.8], color="red")) ax.add_artist(lines.Line2D([0.2, 0.2], [0.1, 0.8], color="black")) ax.add_artist(lines.Line2D([0.9, 0.9], [0.1, 0.8], color="black")) ax.add_artist(lines.Line2D([0.2, 0.9], [0.1, 0.1], color="black")) ax.add_artist(lines.Line2D([0.2, 0.9], [0.8, 0.8], color="black")) ax.add_artist(lines.Line2D([0.2, 0.9], [0.45, 0.45], color="black")) ax.text((0.2 + 0.55) / 2., 0.87, "Known\nModel", ha="center", va="center", size="medium") ax.text((0.9 + 0.55) / 2., 0.87, "Unknown\nModel", ha="center", va="center", size="medium") ax.text(0.1, (0.1 + 0.45) / 2., "Continuous", ha="center", va="center", size="medium", rotation=50) ax.text(0.1, (0.8 + 0.45) / 2., "Discrete", ha="center", va="center", size="medium", rotation=50) ax.text((0.2 + 0.55) / 2., (0.1 + 0.45) / 2., "Deep-Q-Iteration\n\nDeep-P-Iteration", ha="center", va="center", size="small") ax.text((0.9 + 0.55) / 2., (0.1 + 0.45) / 2., "Deep-Q-Learning\n\nDeep-P-Learning", ha="center", va="center", size="small") ax.text((0.2 + 0.55) / 2., (0.8 + 0.45) / 2., "Q-Iteration\n\nP-Iteration", ha="center", va="center", size="small") ax.text((0.9 + 0.55) / 2., (0.8 + 0.45) / 2., "Q-Learning\n\nP-Learning", ha="center", va="center", size="small") common.save_next_fig(PART_NUM, fig)
def generate_fig1(): fig = Figure(figsize=(8, 4)) canvas = FigureCanvas(fig) ax = fig.add_axes((0.01, 0.01, 0.98, 0.98)) common.set_ax_params(ax) ax.axis([0., 2., 0., 1.]) w = 0.3 h = 0.2 for x, y in product([0.3, 1., 1.7], [0.3, 0.7]): ax.add_patch( patches.FancyBboxPatch((x - w / 2., y - h / 2.), width=w, height=h, boxstyle="round,pad=0.01", facecolor="lightblue", edgecolor="darkblue")) ax.text(0.3, 0.7, "Q-iteration", ha="center", va="center", size="large") ax.text(0.3, 0.3, "P-iteration", ha="center", va="center", size="large") ax.text(1., 0.7, "Q-learning", ha="center", va="center", size="large") ax.text(1., 0.3, "P-learning", ha="center", va="center", size="large") ax.text(1.7, 0.7, "Deep\nQ-learning", ha="center", va="center", size="large") ax.text(1.7, 0.3, "Deep\nP-learning", ha="center", va="center", size="large") w = 0.25 h = 0.1 for x, y in product([1.3 / 2., 2.7 / 2.], [0.3, 0.7]): ax.add_patch( patches.FancyBboxPatch((x - w / 2. - 0.02, y - h / 2.), width=w, height=h, boxstyle="rarrow,pad=0.01", facecolor="lightgreen", edgecolor="darkgreen")) ax.text(1.3 / 2., 0.7, "Unknown\nModel", ha="center", va="center", size="small") ax.text(1.3 / 2., 0.3, "Unknown\nModel", ha="center", va="center", size="small") ax.text(2.7 / 2., 0.7, "Continuous\nState Space", ha="center", va="center", size="small") ax.text(2.7 / 2., 0.3, "Continuous\nState Space", ha="center", va="center", size="small") common.save_next_fig(PART_NUM, fig)
def generate_fig2(): fig = Figure(figsize=(8, 4)) canvas = FigureCanvas(fig) ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 2) for ax in [ax1, ax2]: common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.add_patch( patches.Ellipse((0.65, 0.3), 0.6, 0.3, 20, alpha=0.5, edgecolor="None", facecolor="pink")) ax.add_patch( patches.Ellipse((0.65, 0.3), 0.6, 0.3, 20, edgecolor="pink", facecolor="None", linewidth=3)) e = patches.Ellipse((0.65, 0.3), 0.6, 0.3, 20, alpha=0.5, edgecolor="None", facecolor="pink", label='Possible "Predict" States') x = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.8, 0.7, 0.6, 0.5] y = [0.2, 0.25, 0.2, 0.25, 0.22, 0.36, 0.4, 0.37, 0.4, 0.32] tck, u = interpolate.splprep([x, y], s=0.002, k=5) out = interpolate.splev(np.linspace(0, 1, 1000), tck) s = lines.Line2D(out[0], out[1], color="blue", label='Example "Predict" Trajectory') win = patches.Circle((x[-1], y[-1]), radius=0.01, color="blue", label='Episode End') ax1.add_artist(s) ax1.add_patch(win) ax1.legend(handles=[e, s, win], edgecolor="black", loc="upper left", fontsize="x-small") ax1.set_title("Real Expert") noise_x = (out[0] + np.random.normal(0, 0.006, 1000))[:960:10] noise_y = (out[1] + np.random.normal(0, 0.006, 1000))[:960:10] rand_x = np.linspace(noise_x[-1], 0.4, 10)[1:] + np.random.normal( 0, 0.015, 9) rand_y = np.linspace(noise_y[-1], 0.6, 10)[1:] + np.random.normal( 0, 0.015, 9) okay = lines.Line2D(np.r_[noise_x, rand_x[:1]], np.r_[noise_y, rand_y[:1]], color="blue", label='"Imitation" Trajectory inside D') rand = lines.Line2D(rand_x[0:], rand_y[0:], color="red", label='"Imitation" Trajectory outside D') lose = patches.Circle((rand_x[-1], rand_y[-1]), radius=0.01, color="red", label="Episode End") ax2.add_artist(okay) ax2.add_artist(rand) ax2.add_patch(lose) ax2.legend(handles=[e, okay, rand, lose], edgecolor="black", loc="upper left", fontsize="x-small") ax2.set_title("Imitation") common.save_next_fig(PART_NUM, fig)
def generate_fig3(): fig = Figure(figsize=(8, 8)) canvas = FigureCanvas(fig) ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2) ax3 = fig.add_subplot(2, 2, 3) ax4 = fig.add_subplot(2, 2, 4) E = [] for ax in [ax1, ax2, ax3, ax4]: common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) e1 = patches.Ellipse((0.65, 0.3), 0.6, 0.3, 20, alpha=0.5, edgecolor="None", facecolor="pink") e2 = patches.Ellipse((0.65, 0.3), 0.6, 0.3, 20, edgecolor="pink", facecolor="None", linewidth=3) ax.add_patch(e1) ax.add_patch(e2) E.append(e2) e0 = patches.Ellipse((0.65, 0.3), 0.6, 0.3, 20, alpha=0.5, edgecolor="None", facecolor="pink", label='Possible "Predict" States') x = np.random.random(500) y = np.random.random(500) s = lines.Line2D(x, y, linestyle="None", marker=".", markersize=2, label="Samples", color="darkblue") ax1.add_artist(s) s.set_clip_path(E[0]) ax1.legend(handles=[e0, s], edgecolor="black", loc="upper left", fontsize="x-small") ax1.set_title("Sampling from Games") x = np.random.random(100) y = np.random.random(100) s = lines.Line2D(x, y, linestyle="None", marker=".", markersize=2, label="Samples", color="darkblue") ax2.add_artist(s) ax2.legend(handles=[e0, s], edgecolor="black", loc="upper left", fontsize="x-small") ax2.set_title("Sampling Uniformally") e1 = patches.Ellipse((0.65, 0.3), 0.65, 0.35, 20, alpha=0.1, edgecolor="None", facecolor="blue", label="Useful Approximation") ax3.add_patch(e1) ax3.legend(handles=[e0, e1], edgecolor="black", loc="upper left", fontsize="x-small") e2 = patches.Rectangle((0., 0.), 1., 1., alpha=0.1, edgecolor="None", facecolor="blue", label="Useful Approximation") ax4.add_patch(e2) ax4.legend(handles=[e0, e2], edgecolor="black", loc="upper left", fontsize="x-small") common.save_next_fig(PART_NUM, fig)
def generate_fig1(width=5): def arrow(start, end): return common.arrow_by_start_end(start, end, head_width=0.01, head_length=0.01, length_includes_head=True, alpha=0.5, color="black") fig = Figure(figsize=(width, 5)) canvas = FigureCanvas(fig) ax = fig.add_axes((0.01, 0.01, 0.98, 0.98)) common.set_ax_params(ax) box_width = 0.05 x_middle = np.linspace(0.1, 0.9, 5) x_start = x_middle - box_width / 2 x_end = x_middle + box_width / 2 heights = np.array([0.2, 0.8, 0.8, 0.8, 0.1]) y_start = 0.5 - 0.5 * heights y_end = 0.5 + 0.5 * heights labels = [ "state ($\mathbb{R}^{8}$)", "hidden$_1$ ($\mathbb{R}^{50}$, ReLU)", "hidden$_2$ ($\mathbb{R}^{50}$, ReLU)", "hidden$_3$ ($\mathbb{R}^{50}$, ReLU)", "output" ] texts = [] boxes = [] for i in range(5): boxes.append( patches.FancyBboxPatch((x_start[i], y_start[i]), width=box_width, height=heights[i], boxstyle="round,pad=0.01", facecolor="white")) texts.append( ax.text(x_middle[i], 0.5, labels[i], ha="center", va="center", size="large", rotation=90)) arrows = [] for i in range(4): arrows.append( arrow((x_end[i] + 0.01, y_end[i] + 0.01), (x_start[i + 1] - 0.01, y_end[i + 1]))) arrows.append( arrow((x_end[i] + 0.01, y_end[i] + 0.01), (x_start[i + 1] - 0.01, y_start[i + 1]))) arrows.append( arrow((x_end[i] + 0.01, y_start[i] - 0.01), (x_start[i + 1] - 0.01, y_end[i + 1]))) arrows.append( arrow((x_end[i] + 0.01, y_start[i] - 0.01), (x_start[i + 1] - 0.01, y_start[i + 1]))) for p in boxes + arrows: ax.add_patch(p) common.save_next_fig(PART_NUM, fig)
def generate_fig1(): fig = Figure(figsize=(8, 4)) canvas = FigureCanvas(fig) ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 2) for ax in [ax1, ax2]: common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax1.set_title("Sequential Learning") ax2.set_title("Parallel Learning") box_width = 0.1 box_height = 0.1 y_middle = np.linspace(0.9, 0.1, 5) y_start = y_middle - box_height / 2 y_end = y_middle + box_height / 2 x_start = 0.5 - 0.5 * box_width x_middle = 0.5 x_end = 0.5 + 0.5 * box_width for i in range(0, 5, 2): ax1.add_patch( patches.FancyBboxPatch((x_start, y_start[i]), width=box_width, height=box_height, boxstyle="round,pad=0.01", facecolor="lightblue")) ax1.text(x_middle, y_middle[i], "$\\pi_{{{}}}$".format(i // 2), ha="center", va="center", size="large") for i in range(1, 5, 2): ax1.add_patch( patches.FancyBboxPatch((x_start, y_start[i]), width=box_width, height=box_height, boxstyle="round,pad=0.01", facecolor="lightgreen")) ax1.text(x_middle, y_middle[i], "$e_{{{}}}$".format(i // 2), ha="center", va="center", size="large") ax1.add_patch( common.arrow_by_start_end((x_middle, y_start[i - 1] - 0.01), (x_middle, y_end[i] + 0.01), width=0.005, length_includes_head=True, color="black")) ax1.add_patch( common.arrow_by_start_end((x_middle, y_start[i] - 0.01), (x_middle, y_end[i + 1] + 0.01), width=0.005, length_includes_head=True, color="black")) x2_middle = np.linspace(0.9, 0.1, 5) x2_start = y_middle - box_width / 2 x2_end = y_middle + box_width / 2 for i in range(0, 5, 2): ax2.add_patch( patches.FancyBboxPatch((x_start, y_start[i]), width=box_width, height=box_height, boxstyle="round,pad=0.01", facecolor="lightblue")) ax2.text(x_middle, y_middle[i], "$\\pi_{{{}}}$".format(i // 2), ha="center", va="center", size="large") for i in range(1, 5, 2): for j in range(0, 5): ax2.add_patch( patches.FancyBboxPatch((x2_start[j], y_start[i]), width=box_width, height=box_height, boxstyle="round,pad=0.01", facecolor="lightgreen")) ax2.text(x2_middle[j], y_middle[i], "$e^{{({})}}_{{{}}}$".format(i // 2, 5 - j), ha="center", va="center", size="large") ax2.add_patch( common.arrow_by_start_end((x_middle, y_start[i - 1] - 0.01), (x2_middle[j], y_end[i] + 0.01), width=0.005, length_includes_head=True, color="black")) ax2.add_patch( common.arrow_by_start_end((x2_middle[j], y_start[i] - 0.01), (x_middle, y_end[i + 1] + 0.01), width=0.005, length_includes_head=True, color="black")) common.save_next_fig(PART_NUM, fig)
def generate_fig8(width=8): fig = Figure(figsize=(width, 8)) canvas = FigureCanvas(fig) gs = gridspec.GridSpec(4, 4) fig.suptitle("Visual Comparison of *-Q-* Algorithms") tabl_q_itert = [] deep_q_itert = [] deep_q_learn = [] deep_q_lr_re = [] for i in range(4): tabl_q_itert.append(fig.add_subplot(gs[i, 0])) deep_q_itert.append(fig.add_subplot(gs[i, 1])) deep_q_learn.append(fig.add_subplot(gs[i, 2])) deep_q_lr_re.append(fig.add_subplot(gs[i, 3])) tabl_q_itert[0].set_title("Tabular Q-Iteration") deep_q_itert[0].set_title("Deep-Q-Iteration") deep_q_learn[0].set_title("Deep-Q-Learning") deep_q_lr_re[0].set_title("Deep-Q-Learning\nwith Replay DB") for ax in tabl_q_itert + deep_q_itert + deep_q_learn + deep_q_lr_re: common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) for ax in tabl_q_itert: for v in np.linspace(0, 1, 6)[1:-1]: ax.add_artist(lines.Line2D([0., 1.], [v, v], color="black")) ax.add_artist(lines.Line2D([v, v], [0., 1.], color="black")) for ax in tabl_q_itert[::2]: X = np.linspace(0.1, 0.9, 5) Y = np.linspace(0.1, 0.9, 5) x, y = zip(*product(X, Y)) ax.add_artist( lines.Line2D(x, y, linestyle="None", marker=".", markersize=2, color="darkblue")) for i, ax in enumerate(tabl_q_itert[1::2]): ax.add_artist( patches.Rectangle((0., 0.), 1., 1., alpha=1 - 0.8**(i + 1), facecolor="blue")) for ax in deep_q_itert[::2]: x = np.random.random(50) y = np.random.random(50) ax.add_artist( lines.Line2D(x, y, linestyle="None", marker=".", markersize=2, color="darkblue")) for i, ax in enumerate(deep_q_itert[1::2]): ax.add_artist( patches.Rectangle((0., 0.), 1., 1., alpha=1 - 0.8**(i + 1), facecolor="blue")) for ax in [deep_q_learn[0], deep_q_lr_re[0]]: e = patches.Ellipse(*ELLIPSE_SMALL[0], edgecolor="darkgreen", facecolor="green", linewidth=1, alpha=0.4, label="Possible $\\pi_0$ States") ax.add_artist(e) x = np.random.random(300) y = np.random.random(300) s = lines.Line2D(x, y, linestyle="None", marker=".", label="Samples", markersize=2, color="darkblue") ax.add_artist(s) s.set_clip_path(e) for ax in [deep_q_learn[1], deep_q_lr_re[1]]: e = patches.Ellipse(*ELLIPSE_BIG[0], edgecolor="None", facecolor="blue", linewidth=1, alpha=0.2, label="1 Q update") ax.add_artist(e) for ax in [deep_q_learn[2], deep_q_lr_re[2]]: e = patches.Ellipse(*ELLIPSE_SMALL_ALT, edgecolor="darkgreen", facecolor="green", linewidth=1, alpha=0.4, label="Possible $\\pi_1$ States") ax.add_artist(e) x = np.random.random(300) y = np.random.random(300) s = lines.Line2D(x, y, linestyle="None", marker=".", label="Samples", markersize=2, color="darkblue") ax.add_artist(s) s.set_clip_path(e) deep_q_learn[3].add_patch( patches.Ellipse(*ELLIPSE_SMALL[0], edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8, label="1 Q update")) deep_q_learn[3].add_patch( patches.Ellipse(*ELLIPSE_BIG_ALT, edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8, label="2 Q update")) e1 = patches.Ellipse(*ELLIPSE_BIG[0], edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8**2, label="1 Q update") e2 = patches.Ellipse(*ELLIPSE_BIG_ALT, edgecolor="None", facecolor="blue", linewidth=1, alpha=1 - 0.8, label="2 Q update") e3 = patches.Ellipse(*ELLIPSE_BIG_ALT, edgecolor="None", facecolor="lightgrey") deep_q_lr_re[3].add_patch(e2) deep_q_lr_re[3].add_patch(e3) deep_q_lr_re[3].add_patch(e1) e3.set_clip_path(e1) common.save_next_fig(PART_NUM, fig)
def generate_fig3(): tree = RandomTree(3) for i in range(10): tree.simulate(8) while True: try: a = np.random.choice(3) tree.children[a].set() break except AttributeError: pass fig1 = Figure(figsize=(16/2, 9/2)) canvas1 = FigureCanvas(fig1) ax1 = fig1.add_axes((0.01, 0.01, 0.98, 0.98)) fig2 = Figure(figsize=(16/2, 9/2)) canvas2 = FigureCanvas(fig2) ax2 = fig2.add_axes((0.01, 0.01, 0.98, 0.98)) fig3 = Figure(figsize=(16, 9)) canvas3 = FigureCanvas(fig3) ax3 = fig3.add_axes((0.01, 0.01, 0.98, 0.98)) for ax in [ax1, ax2, ax3]: common.set_ax_params(ax) ax.axis([0., 16., 0., 9.]) r = 0.4 tree.xy = (1., 9. / 2.) tree.box_xy = (1. - r, 9. / 2. - r) tree.width = 2 * r tree.height = 2 * r tree.text = "$s$" tree.facecolor1 = "lightblue" tree.facecolor2 = "lightblue" tree.alpha = 0.2 tree.connectors = [(1. + r + 0.1, 9. / 2. + j * r / 3) for j in [1, 0, -1]] X = np.linspace(3., 15., 8) L = [tree] for i in range(8): L2 = [] for n in L: L2.extend(c for c in n.children if c) Y = np.linspace(9., 0., len(L2) + 2)[1:-1] cnt = 0 for n in L: for j in range(3): if n.children[j] is not None: c = n.children[j] x, y = X[i], Y[cnt] c.connectors = [(x + r/2 + 0.1, y + k * r / 6) for k in [1, 0, -1]] c.xy = (x, y) c.box_xy = (x - r/2, y - r/2) c.width = r c.height = r c.father_a_xy = n.connectors[j] c.a_xy = (x - r/2 - 0.1, y) c.text = "$a_{}$".format(j) c.facecolor1 = "lightgreen" if (i==0 and c.active): c.facecolor2 = "lightblue" else: c.facecolor2 = "lightgreen" c.alpha = 1. if c.active else 0.2 cnt += 1 L = L2 writer = FFMpegWriter() writer.setup(fig3, "figures/part{}/mcts_movie.mp4".format(PART_NUM)) writer.grab_frame() reset = False for c in tree.visitorder: if reset: n = c L = [] while n: L.append(n) n = n.parent else: L = [c] for n in L[::-1]: n.draw(ax3, "red", 1., "xx-large") writer.grab_frame() n.remove(ax3) n.draw(ax3, n.facecolor1, 1., "xx-large") writer.grab_frame() c.draw(ax1, c.facecolor1, 1.) c.draw(ax2, c.facecolor2, c.alpha) reset = not any(c.children) writer.finish() common.save_next_fig(PART_NUM, fig1) common.save_next_fig(PART_NUM, fig2)
def generate_fig3(): for i, s, c, a, l in [(12, 0, "red", 1., "Failed Episode"), (13, 180, "purple", 0.5, "Negative Failed Episode")]: fig = Figure(figsize=(4, 4)) canvas = FigureCanvas(fig) ax = fig.add_subplot(1, 1, 1) common.set_ax_params(ax) ax.axis([0., 1., 0., 1.]) ax.set_title("Policies Space") ax.add_artist( patches.Wedge((0.5, 0.5), 1., -30, 30, facecolor="blue", alpha=0.2, edgecolor="None")) ax.add_artist( patches.Wedge((0.5, 0.5), 1., 30, -30, facecolor="red", alpha=0.2, edgecolor="None")) ax.add_artist( common.arrow_by_start_size_angle((0.5, 0.5), 0.4, np.radians(-25), width=0.01, length_includes_head=True, color="blue")) ax.add_artist( common.arrow_by_start_size_angle((0.5, 0.5), 0.4, np.radians(10), width=0.01, length_includes_head=True, color="blue")) ax.add_artist( common.arrow_by_start_size_angle((0.5, 0.5), 0.4, np.radians(s + 40), width=0.01, length_includes_head=True, color=c, alpha=a)) ax.add_artist( common.arrow_by_start_size_angle((0.5, 0.5), 0.4, np.radians(s + 55), width=0.01, length_includes_head=True, color=c, alpha=a)) ax.add_artist( common.arrow_by_start_size_angle((0.5, 0.5), 0.4, np.radians(s - 50), width=0.01, length_includes_head=True, color=c, alpha=a)) ax.add_artist(patches.Circle((0.5, 0.5), radius=0.02, color="black")) ax.legend( handles=[ patches.Patch(color="blue", label="Better Policies", alpha=0.2), patches.Patch(color="red", label="Worse Policies", alpha=0.2), patches.FancyArrow(0., 0., 1., 1., color="blue", label="Successful Episode"), patches.FancyArrow(0., 0., 1., 1., color=c, alpha=a, label=l), patches.Circle((0.5, 0.5), radius=0.02, color="black", label="Current Policy") ], edgecolor="black", loc="upper left", fontsize="x-small", ) common.save_next_fig(PART_NUM, fig)