示例#1
0
def recommend(
    datestrs,
    name='departure_tree',
    verbose=False,
):
    """
    Parameters
    ---------
    datestrs: list of strings
        Datestrings of the format YYYY-MM-DD
    names: string
        The stems fo the filename where the model is store.
    verbose: boolean

    Returns
    -------
    recommendations: dict of int
        Dictionary keys are datestrings and values are departure times
    """
    model_name = name + '.pickle'
    try:
        # Try to load a saved tree
        tree = tools.restore(model_name)
    except Exception:
        # If unsuccessful, create a new one
        tree = create_tree(verbose=verbose)
        tools.store(tree, model_name)

    features_df = create_features(datestrs)
    departures = {}
    for datestr in datestrs:
        estimated_departure = tree.estimate(features_df.loc[datestr, :])
        departures[datestr] = estimated_departure

    return departures
示例#2
0
def get_trips():
    """
    Attempt to restore a saved copy.
    If unsuccessful, download a new one.

    Returns
    -------
    trips: list of dictionaries
    """
    trips_filename = 'trips.pickle'
    try:
        trips = tools.restore(trips_filename)
    except Exception:
        trips = download_data()
        tools.store(trips, trips_filename)
    return trips
示例#3
0
def get_arrival_times(trips_df):
    """
    Attempt to restore a saved copy.
    if unsuccessful, download a new one.

    Parameters
    ----------
    trips_df: DataFrame

    Returns
    -------
    arrival_times_df: DataFrame
    """
    arrival_times_filename = 'arrival_times.pickle'
    try:
        arrival_times_df = tools.restore(arrival_times_filename)
    except Exception:
        arrival_times_df = None
    if arrival_times_df is None:
        arrival_times_df = calculate_arrival_times(trips_df)
        tools.store(arrival_times_df, arrival_times_filename)

    return arrival_times_df
示例#4
0
def trainer(env, opt):

    agent = tools.qAgent(opt)
    sess = tf.InteractiveSession()

    obs, brd, Q1 = agent.valueNet()
    nobs, nbrd, Q2 = agent.valueNet()

    act = tf.placeholder(tf.float32, [None, opt.GAME_SIZE, opt.GAME_SIZE])
    rwd = tf.placeholder(tf.float32, [
        None,
    ])

    val1 = tf.reduce_sum(tf.reduce_sum(tf.multiply(Q1, act), -1), -1)
    val2 = rwd + opt.GAMMA * tf.reduce_max(tf.reduce_max(Q2, -1), 1)

    loss = tf.reduce_mean(tf.square(val1 - val2))
    trainStep = tf.train.AdamOptimizer(opt.LR).minimize(loss)

    sess.run(tf.global_variables_initializer())
    saver = tools.restore(sess)

    globalStep = 0

    for i_e in range(1, opt.MAX_EPISODE + 1):
        state = env.reset()
        done = False
        sumLoss = 0
        step = 0
        score = 0

        while not done:
            globalStep += 1
            step += 1

            if not (globalStep + 1) % opt.EPS_STEP and agent.eps > opt.FIN_EPS:
                agent.eps *= opt.EPS_DECAY

            agent.obsMem.push(state)
            agent.brdMem.push(env.board)

            action, actBrd = agent.smpAct(Q1, {
                obs: [state],
                brd: [env.board]
            }, env.board, step)
            state, nstate, done, reward = env.step(action)
            score += reward

            agent.nobsMem.push(nstate)
            agent.actMem.push(actBrd)
            agent.rwdMem.push(reward)
            agent.nbrdMem.push(env.board)

            if globalStep >= opt.MEM_SIZE:
                randIdx = np.random.choice(opt.MEM_SIZE, opt.BATCH_SIZE)
                lossVal, _ = sess.run(
                    [loss, trainStep],
                    feed_dict={
                        obs: agent.obsMem.mem[randIdx],
                        act: agent.actMem.mem[randIdx],
                        rwd: agent.rwdMem.mem[randIdx],
                        nobs: agent.nobsMem.mem[randIdx],
                        brd: agent.brdMem.mem[randIdx],
                        nbrd: agent.nbrdMem.mem[randIdx]
                    })

                sumLoss += lossVal

        if not i_e % 1:
            print(
                "====== Episode %d ended with score = %f, avg_loss = %f ======"
                % (i_e, score, sumLoss / step))

        if i_e > opt.MEM_SIZE and not i_e % 100:
            saver.save(sess, 'checkpoints/omok-dqn', global_step=globalStep)