Exemplo n.º 1
0
def functionality_nominal(sequence: Sequence[Hashable]) -> float:
    examples = dict()
    for _t, (each_input, each_output) in enumerate(sequence):
        sub_dict = examples.get(each_input)
        if sub_dict is None:
            sub_dict = {each_output: 1}
            examples[each_input] = sub_dict
        else:
            sub_dict[each_output] = sub_dict.get(each_output, 0) + 1

        if Timer.time_passed(2000):
            print("{:05d} examples processed...".format(_t))

    best = 0
    total = 0
    for _i, (each_input, output_frequencies) in enumerate(examples.items()):
        frequencies = output_frequencies.values()
        max_frequency = max(frequencies)
        best += max_frequency
        total += sum(frequencies)

        if Timer.time_passed(2000):
            print("{:05d} examples processed...".format(total))

    return best / total
Exemplo n.º 2
0
def test_3d():
    from mpl_toolkits.mplot3d import Axes3D

    dim_range = -10., 10.

    plot_axis, error_axis = setup_3d_axes()

    r = MultiplePolynomialFromLinearRegression(2,
                                               4,
                                               past_scope=100,
                                               learning_drag=0)

    # fun = lambda _x, _y: 10. + 1. * _x ** 1. + 1. * _y ** 1. + 4. * _x * _y + 1. * _x ** 2. + -2.6 * _y ** 2.
    fun = lambda _x, _y: -cos(_x / (1. * math.pi)) + -cos(_y / (1. * math.pi))
    plot_surface(plot_axis, fun, (dim_range, dim_range), resize=True)
    #pyplot.pause(.001)
    #pyplot.draw()

    iterations = 0

    error_development = deque(maxlen=10000)

    while True:
        x = random.uniform(*dim_range)
        y = random.uniform(*dim_range)
        z_o = r.output([x, y])

        z_t = fun(x, y)
        error = 0 if iterations < 1 else smear(error_development[-1],
                                               abs(z_o - z_t), iterations)
        error_development.append(error)

        if Timer.time_passed(1000):
            print(f"{iterations:d} finished")

            c = r.get_coefficients()
            print(c)
            print(r.derive_coefficients(c, 0))

            ln = plot_surface(plot_axis,
                              lambda _x, _y: r.output([_x, _y]),
                              (dim_range, dim_range),
                              resize=False)
            # ln_d = plot_surface(plot_axis, lambda _x, _y: r.derivation_output([_x, _y], 0), (dim_range, dim_range), resize=False)
            e, = error_axis.plot(range(len(error_development)),
                                 error_development,
                                 color="black")
            error_axis.set_ylim(
                (min(error_development), max(error_development)))

            pyplot.pause(.001)
            pyplot.draw()

            ln.remove()
            # ln_d.remove()
            e.remove()

        r.fit([x, y], z_t)  # , past_scope=iterations)
        iterations += 1
Exemplo n.º 3
0
    def _plot_h_stacked_bars(axis: pyplot.Axes.axes, segments: Sequence[Sequence[Tuple[Any, float]]]):
        for _i, each_level in enumerate(segments):
            for _x in range(len(each_level) - 1):
                each_left, each_shape = each_level[_x]
                each_right, _ = each_level[_x + 1]
                each_width = each_right - each_left
                hsv = distribute_circular(each_shape), .2, 1.
                axis.barh(_i, each_width, height=1., align="edge", left=each_left, color=hsv_to_rgb(hsv))

                if Timer.time_passed(2000):
                    print("Finished {:5.2f}% of plotting level {:d}/{:d}...".format(100. * _x / (len(each_level) - 1), _i, len(segments)))
Exemplo n.º 4
0
def new_setup():
    iterations = 500000

    no_ex = 1
    cryptos = "qtum", "bnt", "snt", "eos"

    train_in_cryptos = cryptos[:1]
    train_out_crypto = cryptos[0]

    test_in_cryptos = cryptos[:1]
    test_out_crypto = cryptos[0]

    in_dim = len(train_in_cryptos)
    out_dim = 1

    start_stamp = 1501113780
    end_stamp = 1532508240
    behind = 60

    predictor = RationalSemioticModel(input_dimension=in_dim,
                                      output_dimension=out_dim,
                                      no_examples=no_ex,
                                      alpha=100,
                                      sigma=.2,
                                      drag=100,
                                      trace_length=1)
    training_streams = exchange_rate_sequence(start_stamp, end_stamp - behind,
                                              behind, train_in_cryptos,
                                              train_out_crypto)
    test_streams = exchange_rate_sequence(start_stamp + behind, end_stamp,
                                          behind, test_in_cryptos,
                                          test_out_crypto)

    setup = SetupPrediction("test",
                            predictor,
                            training_streams,
                            test_streams,
                            logging_steps=iterations // 1000)

    for _i in range(iterations):
        data = next(setup)
        if Timer.time_passed(2000):
            print(
                f"finished {(_i + 1) * 100 / iterations:5.2f}%...\n{str(data):s}\n"
            )
Exemplo n.º 5
0
    def some_random_games_first():
        # Each of these is its own game.
        # this is each frame, up to 200...but we wont make it that far.

        average_reward = 0.
        iterations = 0
        sensor = None
        visualize = False
        while True:
            # This will display the environment
            # Only display if you really want to see it.
            # Takes much longer to display it.
            if average_reward >= .9:
                visualize = True
            if visualize:
                env.render()

            # This will just create a sample action in any environment.
            # In this environment, the action can be 0 or 1, which is left or right

            if sensor is None:
                # motor = env.action_space.sample()
                motor = 0.,
            else:
                motor = controller.react(tuple(sensor))

            # this executes the environment with an action,
            # and returns the observation of the environment,
            # the reward, if the env is over, and other info.
            state = env.step(numpy.array(motor))
            sensor, reward, done, info = state

            # (x_pos, x_vel, theta_ang, theta_vel)

            controller.integrate(tuple(sensor), tuple(motor), reward)

            average_reward = smear(average_reward, reward, iterations)
            iterations += 1

            if Timer.time_passed(2000):
                print(f"{iterations:010d} iterations, average reward: {average_reward:.2f}")
Exemplo n.º 6
0
    def _get_segments(time_axis: Sequence[Any], states: List[Tuple[int, ...]]) -> Tuple[Sequence[Tuple[int, Any]], ...]:
        assert(len(time_axis) == len(states))
        max_level = max(len(_x) for _x in states)
        levels = tuple([] for _ in range(max_level))

        for _j, (each_time, each_context) in enumerate(zip(time_axis, states)):
            for _i, each_level in enumerate(levels):
                each_shape = each_context[_i] if _i < len(each_context) else -1

                if 0 < len(each_level):
                    _, last_shape = each_level[-1]
                else:
                    last_shape = -1

                if each_shape != last_shape:
                    data_point = each_time, each_shape
                    each_level.append(data_point)

            if Timer.time_passed(2000):
                print("Finished {:5.2f}% of segmenting...".format(100. * _j / len(time_axis)))

        return levels
def setup(predictor: Predictor,
          train_generator,
          test_generator,
          visualization_steps: int,
          iterations: int = 500000):
    print("Starting experiment with {:s} for {:d} iterations...".format(
        predictor.name(), iterations))

    average_train_error = 0.
    average_test_error = 0.
    average_duration = 0.

    # exchange rate adaptation
    # error_list = []

    for t in range(iterations):
        # get concurrent examples
        examples_train = next(train_generator)
        inputs_train, targets_train = zip(*examples_train)

        examples_test = next(test_generator)
        inputs_test, targets_test = zip(*examples_test)

        # perform predictors and fit
        this_time = time.time()
        outputs_train = predictor.predict(inputs_train)
        outputs_test = predictor.predict(inputs_test)

        predictor.fit(examples_train)

        duration = time.time() - this_time

        # todo: continue from here

        # update plot
        try:
            train_error = sum(
                sqrt(sum((__o - __t)**2 for __o, __t in zip(_o, _t))) for _o,
                _t in zip(outputs_train, targets_train)) / len(targets_train)
        except TypeError:
            train_error = sum(
                float(_o != _t) for _o, _t in zip(
                    outputs_train, targets_train)) / len(targets_train)

        try:
            test_error = sum(
                sqrt(sum((__o - __t)**2
                         for __o, __t in zip(_o, _t))) for _o, _t in zip(
                             outputs_test, targets_test)) / len(targets_test)
        except TypeError:
            test_error = sum(
                float(_o != _t) for _o, _t in zip(
                    outputs_test, targets_test)) / len(targets_test)

        # exchange rate adaptation
        # if .5 < concurrent_outputs[0][0]:
        #     error_list.append(error)

        average_train_error = (average_train_error * t + train_error) / (t + 1)
        average_test_error = (average_test_error * t + test_error) / (t + 1)

        average_duration = (average_duration * t + duration) / (t + 1)
        if (t + 1) % visualization_steps == 0:
            # exchange rate adaptation
            Visualize.append("error train", predictor.__class__.__name__,
                             average_train_error)
            Visualize.append("error test", predictor.__class__.__name__,
                             average_test_error)

            Visualize.append("duration", predictor.__class__.__name__,
                             average_duration)

            try:
                for _e, (each_train_output, each_train_target) in enumerate(
                        zip(outputs_train, targets_train)):
                    for _o, (train_output_value,
                             train_target_value) in enumerate(
                                 zip(each_train_output, each_train_target)):
                        axis_key = f"output train {_o:02d}/{_e:02d}"
                        Visualize.append(axis_key,
                                         predictor.__class__.__name__,
                                         train_output_value)
                        Visualize.append(axis_key, "target train",
                                         train_target_value)

            except TypeError:
                pass

            try:
                for _e, (each_test_output, each_test_target) in enumerate(
                        zip(outputs_test, targets_test)):
                    for _o, (test_output_value,
                             test_target_value) in enumerate(
                                 zip(each_test_output, each_test_target)):
                        axis_key = f"output test {_o:02d}/{_e:02d}"
                        Visualize.append(axis_key,
                                         predictor.__class__.__name__,
                                         test_output_value)
                        Visualize.append(axis_key, "target test",
                                         test_target_value)

            except TypeError:
                pass

        if Timer.time_passed(2000):
            print("Finished {:05.2f}%...".format(100. * t / iterations))

    Visualize.finalize("error train", predictor.__class__.__name__)
    Visualize.finalize("error test", predictor.__class__.__name__)
    Visualize.finalize("duration", predictor.__class__.__name__)
Exemplo n.º 8
0
def test_2d():
    dim_range = -10., 10.

    plot_axis, error_axis = setup_2d_axes()

    r = MultiplePolynomialFromLinearRegression(1, 3, -1)
    # r = MultiplePolynomialOptimizationRegression(1, 3)

    # fun = lambda _x: -cos(_x / (1. * math.pi))
    fun = lambda _x: 0. + 0. * _x**1. + 0. * _x**2. + 1. * _x**3.  #  + 1. * _x ** 4.
    x_range = tuple(
        _x / 10.
        for _x in range(int(dim_range[0]) * 10,
                        int(dim_range[1]) * 10))
    y_range = tuple(fun(_x) for _x in x_range)
    plot_axis.plot(x_range, y_range, color="C0")
    plot_axis.set_xlim(*dim_range)

    iterations = 0

    window_size = 100000
    error_development = deque(maxlen=window_size)

    while True:
        x = random.uniform(*dim_range)
        y_o = r.output([x])

        y_t = fun(x)
        error = 0 if iterations < 1 else smear(error_development[-1],
                                               abs(y_o - y_t), iterations)
        error_development.append(error)

        if Timer.time_passed(1000):
            print(f"{iterations:d} iterations finished")

            values = tuple(r.output([_x]) for _x in x_range)
            l, = plot_axis.plot(x_range, values, color="C1")
            values_d = tuple(r.derivation_output([_x], 0) for _x in x_range)
            l_d, = plot_axis.plot(x_range, values_d, color="C2")
            plot_axis.set_ylim(
                (min(values + values_d), max(values + values_d)))

            x_min = max(0, iterations - window_size)
            x_max = x_min + window_size
            error_axis.set_xlim((x_min, x_max))

            x_min_limit = max(0, iterations - len(error_development))
            e, = error_axis.plot(range(x_min_limit,
                                       x_min_limit + len(error_development)),
                                 error_development,
                                 color="black")
            error_axis.set_ylim(
                (min(error_development), max(error_development)))

            pyplot.pause(.001)
            pyplot.draw()

            l.remove()
            l_d.remove()
            e.remove()

        r.fit([x], y_t, past_scope=1000, learning_drag=0)

        iterations += 1
def test_3d():
    fig = pyplot.figure()
    axis_3d = fig.add_subplot(221, projection='3d')
    axis_3d.set_xlabel("x")
    axis_3d.set_ylabel("y")
    axis_3d.set_zlabel("z")

    axis_2d_error = fig.add_subplot(222)
    axis_2d_error.set_xlabel("t")
    axis_2d_error.set_ylabel("error")

    axis_2d_yz = fig.add_subplot(223)
    axis_2d_yz.set_xlabel("y")
    axis_2d_yz.set_ylabel("z")

    axis_2d_xz = fig.add_subplot(224)
    axis_2d_xz.set_xlabel("x")
    axis_2d_xz.set_ylabel("z")

    # x_coefficients = 0., 1.,
    # y_coefficients = 0., -1.,

    # degree = number of coefficients - 1
    x_coefficients = -375., 400., -140., 20., -1.,
    y_coefficients = 375., -400., 140., -20., 1.,

    # fun = poly_function(x_coefficients, y_coefficients)
    fun = trig_function()

    number_of_points = 1000
    drag_value = number_of_points

    value_range = 0., 10.

    plot_surface(axis_3d, fun, value_range,
                 colormap=cm.viridis)  # , color="C0")

    # r = MultiplePolynomialRegressor([6, 6])
    r = LinearRegressor(2, number_of_points)

    error = []
    tar_z = []
    out_z = []
    z_min, z_max = .0, .0
    for _t in range(number_of_points):
        p_x = random.uniform(*value_range)
        p_y = random.uniform(*value_range)

        input_values = p_x, p_y
        output_value = r.output(input_values)

        out_z.append((p_x, p_y, output_value))

        p_z = fun(p_x, p_y)
        if _t < 1:
            z_min = p_z
            z_max = p_z
        else:
            z_min = min(z_min, p_z)
            z_max = max(z_max, p_z)

        tar_z.append((p_x, p_y, p_z))

        e = abs(output_value - p_z)
        error_value = e if _t < 1 else smear(error[-1], e, _t)
        error.append(error_value)

        r.fit(input_values, p_z, drag_value)

        #fit_fun = poly_function(*r.get_parameters())
        #fit_surface = plot_surface(axis_3d, fit_fun, value_range, colormap=cm.brg)
        #pyplot.pause(.001)
        #pyplot.draw()
        #if _t < number_of_points - 1:
        #    fit_surface.remove()
        if Timer.time_passed(2000):
            print(f"{_t * 100. / number_of_points:05.2f}% finished.")

    margin = (z_max - z_min) * .1
    axis_3d.set_zlim([z_min - margin, z_max + margin])
    print(error[-1])
    axis_3d.scatter(*zip(*out_z), alpha=.4, color="C1")
    # axis_3d.scatter(*zip(*tar_z), color="blue")

    # axis_3d.scatter(*zip(*points))
    axis_2d_error.plot(error)
    axis_2d_yz.scatter([_p[1] for _p in tar_z], [_p[2] for _p in tar_z],
                       color="C0")
    axis_2d_xz.scatter([_p[0] for _p in tar_z], [_p[2] for _p in tar_z],
                       color="C0")

    fit_x_co, fit_y_co = r.get_parameters()

    # print((x_coefficients, y_coefficients))
    # print((fit_x_co, fit_y_co))
    # print()
    plot_line(axis_2d_xz, fit_x_co, value_range, color="C1")
    plot_line(axis_2d_yz, fit_y_co, value_range, color="C1")

    pyplot.tight_layout()
    pyplot.show()
def controlled_grid_interaction(predictor: Predictor,
                                iterations: int = 500000):
    c = Config("../configs/config.json")
    data_dir = c["data_dir"] + "grid_worlds/"

    grid_world = GridWorldLocal(data_dir + "square.txt", rotational=True)
    # grid_world = GridWorldLocal(data_dir + "simple.txt", rotational=False)
    # grid_world = GridWorldGlobal(data_dir + "sutton.txt", rotational=False)

    controller = SarsaController(grid_world.get_motor_range(),
                                 alpha=.8,
                                 gamma=.1,
                                 epsilon=.1)
    # controller = RandomController(grid_world.get_motor_range())

    last_sensor = None
    last_motor = None
    sensor, reward = grid_world.react_to(None)

    visualization_steps = iterations // 1000
    average_reward = .0
    average_error = .0
    average_duration = .0
    for t in range(iterations):
        # get data
        this_time = time.time()
        concurrent_inputs = (last_sensor, last_motor),
        concurrent_outputs = predictor.predict(concurrent_inputs)
        concurrent_targets = sensor,
        concurrent_examples = (concurrent_inputs[0], concurrent_targets[0]),
        predictor.fit(concurrent_examples)
        d = time.time() - this_time

        # query controller
        perception = predictor.get_state(), last_sensor
        motor = controller.react_to(perception, reward)

        error = sum(
            float(_o != _t)
            for _o, _t in zip(concurrent_outputs, concurrent_targets)) / len(
                concurrent_targets)

        average_reward = (average_reward * t + reward) / (t + 1)
        average_error = (average_error * t + error) / (t + 1)
        average_duration = (average_duration * t + d) / (t + 1)

        if (t + 1) % visualization_steps == 0:
            Visualize.append("reward", predictor.__class__.__name__,
                             average_reward)
            Visualize.append("error", predictor.__class__.__name__,
                             average_error)
            Visualize.append("duration", predictor.__class__.__name__,
                             average_duration)

        last_sensor = sensor
        last_motor = motor

        sensor, reward = grid_world.react_to(motor)

        if Timer.time_passed(2000):
            print("Finished {:05.2f}%...".format(100. * t / iterations))

    Visualize.finalize("reward", predictor.__class__.__name__)
    Visualize.finalize("error", predictor.__class__.__name__)
    Visualize.finalize("duration", predictor.__class__.__name__)
Exemplo n.º 11
0
def actor_critic():
    fig = pyplot.figure()
    actor_axis, critic_axis, reward_axis = fig.subplots(nrows=3, sharex="all")

    actor_axis.set_ylabel("actor error")
    actor_axis.yaxis.set_label_position("right")

    critic_axis.set_ylabel("critic error")
    critic_axis.yaxis.set_label_position("right")

    reward_axis.set_ylabel("average reward")
    reward_axis.yaxis.set_label_position("right")
    reward_axis.set_xlabel("iteration")

    # https://github.com/openai/gym/blob/master/gym/envs/__init__.py
    env = gym.make("CustomMountainCar-infinite-v0")
    # env = gym.make("VanillaMountainCar-infinite-v0")
    env.reset()

    controller = RationalSarsaAC(((-.5, .5),), 2, 500, 5, .5, .1, polynomial_degree=2)
    # controller = RandomRational(((-.5, .5),))

    sensor = None
    visualize = False
    plot = True

    window_size = 100000

    actor_plot, critic_plot, reward_plot = None, None, None
    actor_data, critic_data, reward_data = deque(maxlen=window_size), deque(maxlen=window_size), deque(maxlen=window_size)

    while True:
        if visualize:
            env.render()

        if sensor is None:
            # motor = tuple(random.uniform(*_range) for _range in MountainCar.motor_range())
            motor = .0,
        else:
            motor = controller.react(tuple(sensor))

        state = env.step(numpy.array(motor))
        sensor, reward, done, info = state
        # (x_loc, x_vel)

        controller.integrate(tuple(sensor), tuple(motor), reward)

        actor_data.append(controller.average_actor_error)
        critic_data.append(controller.average_critic_error)
        reward_data.append(controller.average_reward)

        if Timer.time_passed(500) and plot:
            if actor_plot is not None:
                actor_plot.remove()
            if critic_plot is not None:
                critic_plot.remove()
            if reward_plot is not None:
                reward_plot.remove()

            x_min = max(controller.get_iterations() - window_size, 0)

            actor_plot, = actor_axis.plot(range(x_min, x_min + len(actor_data)), actor_data, color="black")
            actor_axis.set_xlim((x_min, x_min + window_size))
            actor_axis.set_ylim((min(actor_data), max(actor_data)))

            critic_plot, = critic_axis.plot(range(x_min, x_min + len(critic_data)), critic_data, color="black")
            critic_axis.set_xlim((x_min, x_min + window_size))
            critic_axis.set_ylim((min(critic_data), max(critic_data)))

            reward_plot, = reward_axis.plot(range(x_min, x_min + len(reward_data)), reward_data, color="black")
            reward_axis.set_xlim((x_min, x_min + window_size))
            reward_axis.set_ylim((min(reward_data), max(reward_data)))

            pyplot.tight_layout()
            pyplot.draw()
            pyplot.pause(.001)

    env.close()