Пример #1
0
def main():
    model = VelocityModel(
        regressionModel=joblib.load('models/gradient-m.model'), frequency=10.0)
    agent = RLAgent('agent',
                    decisionFrequency=20.0,
                    defaultSpeed=4,
                    defaultAltitude=6,
                    yawRate=60,
                    alternativeModel=model,
                    maxDepth=math.inf,
                    initialState=None)

    agent.setRl(verifyModel)
    agent.start()
    agent.join()
Пример #2
0
def monteCarlo(agent, maxDepth=3, trials=12, frequency=10):
    PROCESSES = 4
    model = VelocityModel(
        regressionModel=joblib.load('models/gradient-m.model'),
        frequency=frequency)
    actions = np.array(agent.getActions())
    initialState, isTerminal = agent.getState(), 0

    jobs = [None] * len(actions) * trials
    while bool(isTerminal) is False:
        initialState = agent.getState()
        qs = {i: [] for i in actions}

        for index, a in enumerate(np.repeat(actions, trials)):
            virtualAgent, isTerminal = RLAgent(
                'virtual',
                alternativeModel=model,
                decisionFrequency=math.inf,
                maxDepth=maxDepth,
                initialState=initialState), False
            virtualAgent.setReward(reward)
            virtualAgent.goal = agent.getGoal()
            virtualAgent.goalMargins = agent.getGoalMargins()

            virtualAgent.setRl(
                partial(monteCarloSearch,
                        actions=getRandomActions(a, actions, maxDepth)))
            jobs[index] = virtualAgent

        pool = Pool(8)
        results = [pool.apply_async(job.run) for job in jobs]
        for result in results:
            action, score = result.get()
            qs[action].append(score)

        pool.close()
        pool.join()

        yield actions[np.argmax([np.average(qs[a]) for a in actions])]
        r, nextState, isTerminal = (yield)

        f = 1 / (nextState.lastUpdate - initialState.lastUpdate)
        # correct for deviations from desired freq.
        model.frequency = f

        agent.logger.info(f)

        yield
Пример #3
0
def main():
    agent = RLAgent('agent',
                    decisionFrequency=10.0,
                    defaultSpeed=4,
                    defaultAltitude=6,
                    yawRate=60)

    agent.defineState(orientation=getOrientation,
                      position=getPosition,
                      angularVelocity=getAngularVelocity,
                      linearVelocity=getVelocity,
                      linearAcceleration=getLinearAcceleration,
                      angularAcceleration=getAngularAcceleration)

    agent.setRl(partial(flightLogger, dataset='datasets/' + 'replay.csv'))
    agent.start()
    agent.join()
Пример #4
0
def main():
    agent = RLAgent('agent',
                    decisionFrequency=10.0,
                    defaultSpeed=4,
                    defaultAltitude=20,
                    yawRate=70)

    # callbacks will be called in the order they were specified, beware of order of execution (if any state parameter is
    #  dependant on another)
    # state is lazily updated by the environment as the agent needs it , agent always get the freshest estimate of the
    # state, state updates are done by the environment in a rate that corresponds to agent decision making freq.

    agent.defineState(orientation=getOrientation,
                      angularVelocity=getAngularVelocity,
                      linearVelocity=getVelocity,
                      position=getPosition)

    agent.setRl(monteCarlo)
    agent.setReward(reward)
    agent.setGoal(position=np.array([-40, -50, 0]))
    agent.setGoalMargins(position=np.array([0.5, 0.5, math.inf]))
    agent.start()
    agent.join()