Пример #1
0
    def __init__(self, opts, discrete_actions):
        self.gui = opts.gui
        self.delay = opts.delay if self.gui else 0.0

        self.max_episode_len = opts.max_episode_len

        # threshold for pole position.
        # if absolute x or y moves outside this we finish episode
        self.pos_threshold = 2.0  # TODO: higher?

        # threshold for angle from z-axis.
        # if x or y > this value we finish episode.
        self.angle_threshold = 0.3  # radians; ~= 12deg

        # force to apply per action simulation step.
        # in the discrete case this is the fixed force applied
        # in the continuous case each x/y is in range (-F, F)
        self.action_force = opts.action_force

        # initial push force. this should be enough that taking no action will always
        # result in pole falling after initial_force_steps but not so much that you
        # can't recover. see also initial_force_steps.
        self.initial_force = opts.initial_force

        # number of sim steps initial force is applied.
        # (see initial_force)
        self.initial_force_steps = 30

        # whether we do initial push in a random direction
        # if false we always push with along x-axis (simplee problem, useful for debugging)
        self.random_theta = not opts.no_random_theta

        # true if action space is discrete; 5 values; no push, left, right, up & down
        # false if action space is continuous; fx, fy both (-action_force, action_force)
        self.discrete_actions = discrete_actions

        # 5 discrete actions: no push, left, right, up, down
        # 2 continuous action elements; fx & fy
        if self.discrete_actions:
            self.action_space = spaces.Discrete(5)
        else:
            self.action_space = spaces.Box(-1.0, 1.0, shape=(1, 2))

        # open event log
        if opts.event_log_out:
            import event_log
            self.event_log = event_log.EventLog(opts.event_log_out,
                                                opts.use_raw_pixels)
        else:
            self.event_log = None

        # how many time to repeat each action per step().
        # and how many sim steps to do per state capture
        # (total number of sim steps = action_repeats * steps_per_repeat
        self.repeats = opts.action_repeats
        self.steps_per_repeat = opts.steps_per_repeat

        # how many cameras to render?
        # if 1 just render from front
        # if 2 render from front and 90deg side
        if opts.num_cameras not in [1, 2]:
            raise ValueError("--num-cameras must be 1 or 2")
        self.num_cameras = opts.num_cameras

        # whether we are using raw pixels for state or just pole + cart pose
        self.use_raw_pixels = opts.use_raw_pixels

        # in the use_raw_pixels is set we will be rendering
        self.render_width = opts.render_width
        self.render_height = opts.render_height

        # decide observation space
        if self.use_raw_pixels:
            # in high dimensional case each observation is an RGB images (H, W, 3)
            # we have R repeats and C cameras resulting in (H, W, 3, R, C)
            # final state fed to network is concatenated in depth => (H, W, 3*R*C)
            state_shape = (self.render_height, self.render_width, 3,
                           self.num_cameras, self.repeats)
        else:
            # in the low dimensional case obs space for problem is (R, 2, 7)
            #  R = number of repeats
            #  2 = two items; cart & pole
            #  7d tuple for pos + orientation pose
            state_shape = (self.repeats, 2, 7)
        float_max = np.finfo(np.float32).max
        self.observation_space = gym.spaces.Box(-float_max, float_max,
                                                state_shape)

        # check reward type
        assert opts.reward_calc in ['fixed', 'angle', 'action', 'angle_action']
        self.reward_calc = opts.reward_calc

        # no state until reset.
        self.state = np.empty(state_shape, dtype=np.float32)

        # setup bullet
        p.connect(p.GUI if self.gui else p.DIRECT)
        p.setGravity(0, 0, -9.81)
        p.loadURDF("models/ground.urdf", 0, 0, 0, 0, 0, 0, 1)
        self.cart = p.loadURDF("models/cart.urdf", 0, 0, 0.08, 0, 0, 0, 1)
        self.pole = p.loadURDF("models/pole.urdf", 0, 0, 0.35, 0, 0, 0, 1)
Пример #2
0
    return 0

if 0:
    list = []
    e = {}
    e['type'] = "sms"
    e['sender'] = "123456"
    e['text'] = "Hello, world! One day, we'll have full desktop distributions running on our cellphones, and will write each application just once, not once for desktop and once for mobile."
    list += [ e ]

    e = {}
    e['type'] = "sms"
    e['sender'] = "567823"
    e['text'] = "Androids are nice, there's nothing wrong with Android. Its just... that there's too much non-free software on pretty much every Android handset. Good luck to Cyanogen, but I still prefer Linux."
    list += [ e ]

    e = {}
    e['type'] = "missed"
    e['sender'] = "234567"
    e['text'] = ""
    list += [ e ]

if __name__ == "__main__":
    elog = event_log.EventLog()
    db = contactsdb.ContactsDb()
    db.load_org()
    win = ViewEvents(db)
    win.basic_main_window()
    win.update(elog.events)    
    main()
Пример #3
0
  # can't do this without more complex caching of world state vid frames
  #malmo.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
  # load mission spec
  mission = MalmoPython.MissionSpec(specs.classroom(opts, overclock_tick_ms), True)
  mission_record = MalmoPython.MissionRecordSpec()
  # return all
  return client_pool, malmo, mission, mission_record

client_pool, malmo, mission, mission_record = create_malmo_components()

# init our rl_agent
agent_cstr = eval("agents.%sAgent" % opts.agent)
agent = agent_cstr(opts)

# init event log (if logging events)
event_log = event_log.EventLog(opts.event_log_out) if opts.event_log_out else None

# hook up connection to trainer
if opts.trainer_port == 0:
  trainer = None
else:
  channel = grpc.insecure_channel("localhost:%d" % opts.trainer_port)
  trainer = model_pb2.ModelStub(channel)

for episode_idx in itertools.count(0):
  print util.dts(), "EPISODE", episode_idx, "eval", opts.eval

  # start new mission; explicitly wait for first observation
  # (not just world_state.has_mission_begun)
  mission_start = time.time()
  while True:
Пример #4
0
    print(res)
    return res


def getHumanUtt():
    for i, utt in enumerate(human_utts):
        print(f'{i}) {utt.text}')
    idx = int(input('>>> '))
    return human_utts[idx]


# TODO: not initiating var_spec from Goal - fix.
var_spec = var_spec.VarSpec.fromFileAndUpdate(bot_module_base + '/var_spec',
                                              robot_utts + human_utts)
config = response_logic.Config(var_spec, repeated_utt_demotion=1)
scoring_params = response_logic.ScoringParams(event_log.EventLog([]), config)


def interactiveMode(state):
    while True:
        # human_utt = getHumanUtt()
        human_utt = getHumanUttFreeform()
        scoring_params.event_log.add(human_utt)
        state, diff = response_logic.applyUttAndDiff(state, human_utt)
        print(colors.C(human_utt.text, colors.OKBLUE))
        pprint.pprint(diff)

        goal, robot_utt, score = response_logic.bestReplyForAllGoals(
            state, goals, robot_utts, scoring_params)
        scoring_params.event_log.add(robot_utt)
        print(