示例#1
0
def test_commitments(alice, bob, operator, erc20_plasma_ct,
                     ownership_predicate):
    # Deposit some funds
    commit0_alice_deposit = erc20_plasma_ct.deposit(
        alice.address, 100, ownership_predicate, {'recipient': alice.address})
    commit1_bob_deposit = erc20_plasma_ct.deposit(alice.address, 100,
                                                  ownership_predicate,
                                                  {'recipient': bob.address})
    # Create the new state updates which we plan to commit
    state_bob_ownership = State(ownership_predicate,
                                {'recipient': bob.address})
    state_alice_ownership = State(ownership_predicate,
                                  {'recipient': alice.address})
    # Create the commitment objects based on the states which will be included in plasma blocks
    commit2_alice_to_bob = Commitment(state_bob_ownership,
                                      commit0_alice_deposit.start,
                                      commit0_alice_deposit.end, 0)
    commit3_bob_to_alice = Commitment(state_alice_ownership,
                                      commit1_bob_deposit.start,
                                      commit1_bob_deposit.end, 0)
    # Add the commitments
    erc20_plasma_ct.commitment_chain.commit_block(operator.address, {
        erc20_plasma_ct.address: [commit2_alice_to_bob, commit3_bob_to_alice]
    })
    # Assert inclusion of our commitments
    assert erc20_plasma_ct.commitment_chain.validate_commitment(
        commit2_alice_to_bob, erc20_plasma_ct.address, None)
    assert erc20_plasma_ct.commitment_chain.validate_commitment(
        commit3_bob_to_alice, erc20_plasma_ct.address, None)
示例#2
0
def _worker_init_fn(worker_id):
    worker_info = torch.utils.data.get_worker_info()
    seed = worker_info.seed
    # This value is determined by main process RNG and the worker id.
    seed_prng(seed + worker_id,
              use_cuda=State().use_cuda,
              deterministic=State().deterministic)
示例#3
0
    def step(self, state, action):
        if (action == Action.hit):
            card = self.draw()
            player_sum = self.add_card(state.player_sum, card)
            state_result = State(state.dealer_card, player_sum)
            if (state_result.player_sum > self.win
                    or state_result.player_sum < self.loss):
                reward = -1
                state_result.terminal = True
            else:
                reward = 0
        elif (action == Action.stick):
            state_result = State(state.dealer_card, state.player_sum)
            dealer_sum = state.dealer_card
            while (dealer_sum < self.dealer_stick):
                card = self.draw()
                dealer_sum = self.add_card(dealer_sum, card)
                state_result.dealer_card = dealer_sum
                if (dealer_sum > self.win or dealer_sum < self.loss):
                    reward = 1
                    state_result.terminal = True
                    return [reward, state_result]

            if (dealer_sum > state.player_sum):
                reward = -1
            elif (dealer_sum == state.player_sum):
                reward = 0
            else:
                reward = 1

            state_result.terminal = True
        return [reward, state_result]
示例#4
0
def test_revoke_claim_on_deposit(alice, bob, operator, erc20_plasma_ct,
                                 ownership_predicate):
    # Deposit and send a tx
    commit0_alice_deposit = erc20_plasma_ct.deposit(
        alice.address, 100, ownership_predicate,
        {'owner': alice.address})  # Add deposit
    state_bob_ownership = State(ownership_predicate, {'owner': bob.address})
    commit1_alice_to_bob = Commitment(state_bob_ownership,
                                      commit0_alice_deposit.start,
                                      commit0_alice_deposit.end,
                                      0)  # Create commitment
    # Add the commitment
    erc20_plasma_ct.commitment_chain.commit_block(
        operator.address, {erc20_plasma_ct.address: [commit1_alice_to_bob]})
    revocation_witness0_alice_to_bob = OwnershipRevocationWitness(
        commit1_alice_to_bob, alice.address, 'merkle proof')
    # Try submitting claim on deposit
    deposit_claim_id = erc20_plasma_ct.claim_deposit(100)
    # Check the claim was recorded
    assert len(erc20_plasma_ct.claims) == 1
    # Now bob revokes the claim with the spend inside the revocation witness
    erc20_plasma_ct.revoke_claim(10, deposit_claim_id,
                                 revocation_witness0_alice_to_bob)
    # Check the claim was revoked
    assert erc20_plasma_ct.claims[deposit_claim_id].is_revoked
示例#5
0
 def approximation_to_Q(self):
     Q = np.zeros((self.env.dealer_values, self.env.player_values, self.env.action_values)) 
     for (dealer_sum, player_sum), value in np.ndenumerate(self.V):
         s = State(dealer_sum+1, player_sum+1)
         Q[dealer_sum, player_sum ,0] = np.dot(self.get_feature_vector(s, Action.hit), self.weights)
         Q[dealer_sum, player_sum ,1] = np.dot(self.get_feature_vector(s, Action.stick), self.weights)
     return Q
    def preform_move(self, state: State, dest_location, IsMyTurn) -> State:
        board = state.board.copy()
        my_location = state.my_location
        rival_loaction = state.rival_location
        my_score = state.my_score
        rival_score = state.rival_score
        turn = state.turn
        penalty = state.fine_score
        fruits = state.fruits.copy()
        if IsMyTurn == True:
            state.turn + 1
            board[my_location[0]][my_location[1]] = -1
            if board[dest_location[0]][dest_location[1]] > 2:
                my_score += board[dest_location[0]][dest_location[1]]
            board[dest_location[0]][dest_location[1]] = 1
            my_location = dest_location
        else:
            board[rival_loaction[0]][rival_loaction[1]] = -1
            rival_loaction = dest_location
            if board[dest_location[0]][dest_location[1]] > 2:
                rival_score += board[dest_location[0]][dest_location[1]]
            board[dest_location[0]][dest_location[1]] = 2

        if self.can_I_move(board, my_location) == False:
            my_score -= penalty
        if self.can_I_move(board, rival_loaction) == False:
            rival_score -= penalty
        new_state = State(board, penalty, my_score, rival_score, fruits, turn)
        return new_state
示例#7
0
def test_submit_claim_on_commitment(alice, bob, operator, erc20_plasma_ct,
                                    ownership_predicate):
    # Deposit and send a tx
    commit0_alice_deposit = erc20_plasma_ct.deposit(
        alice.address, 100, ownership_predicate,
        {'owner': alice.address})  # Add deposit
    state_bob_ownership = State(ownership_predicate, {'owner': bob.address})
    commit1_alice_to_bob = Commitment(state_bob_ownership,
                                      commit0_alice_deposit.start,
                                      commit0_alice_deposit.end,
                                      0)  # Create commitment
    # Add the commit
    erc20_plasma_ct.commitment_chain.commit_block(
        operator.address, {erc20_plasma_ct.address: [commit1_alice_to_bob]})
    # Try submitting claim
    claim_id = erc20_plasma_ct.claim_commitment(commit1_alice_to_bob,
                                                'merkle proof', bob.address)
    # Check the claim was recorded
    assert len(erc20_plasma_ct.claims) == 1
    # Now increment the eth block to the redeemable block
    erc20_plasma_ct.eth.block_number = erc20_plasma_ct.claims[
        claim_id].eth_block_redeemable
    # Finally try withdrawing the money!
    erc20_plasma_ct.redeem_claim(claim_id, commit1_alice_to_bob.end)
    # Check bob's balance!
    assert erc20_plasma_ct.erc20_contract.balanceOf(
        bob.address
    ) == 1100  # 1100 comes from bob having been sent 100 & already having 1000
示例#8
0
def skip_test_invalid_tx_exit_queue_resolution(alice, bob, mallory, erc20_plasma_ct, multisig_predicate, erc20_ct):
    # Deposit and commit to an invalid state
    state0_alice_and_bob_deposit = erc20_plasma_ct.deposit_ERC20(alice.address,
                                                                 100,
                                                                 multisig_predicate,
                                                                 {'recipient': [alice.address, bob.address]})
    state1_mallory_to_mallory = State(state0_alice_and_bob_deposit.coin_id,
                                      0,
                                      multisig_predicate,
                                      {'recipient': [mallory.address]})
    erc20_plasma_ct.add_commitment([state1_mallory_to_mallory])  # Add the invalid tx to the first commitment
    # Submit a claim for the invalid state
    invalid_claim = erc20_plasma_ct.submit_claim(state1_mallory_to_mallory, 0)
    # Alice notices the invalid claim, and submits her own claim. Note that it is based on her deposit which is before the tx
    valid_claim = erc20_plasma_ct.submit_claim(state0_alice_and_bob_deposit)
    # Wait for the dispute period to end.
    erc20_plasma_ct.eth.block_number += multisig_predicate.dispute_duration
    # Mallory attempts and fails to withdraw because there's another claim with priority
    try:
        erc20_plasma_ct.resolve_claim(mallory.address, invalid_claim)
        throws = False
    except Exception:
        throws = True
    assert throws
    # Now alice and bob agree to send the money to a new on-chain multisig
    erc20_plasma_ct.resolve_claim(alice.address, valid_claim, ([alice.address, bob.address], 'on chain multisig address'))
    # Check that the balances have updated
    assert erc20_ct.balanceOf('on chain multisig address') == 100
    assert erc20_ct.balanceOf(erc20_plasma_ct.address) == 0
示例#9
0
def main():
    logging.info("Starting.")
    with Storage(state_dirs) as storage:
        for i, hand in enumerate(hands5):
            compact_deck = (1 << 19) - 1
            deck = expand_deck(hand, compact_deck)
            state = State(0, hand, deck)
            winning_probability(state, storage)
            logging.info("%d/%d hands processed." % (i + 1, len(hands5)))
示例#10
0
    def linear_sarsa(self, iters, lambda_, compare_to_monctecarlo = False):     
        """ 
        Linear Function Approximation of sarsa lambda algorithm
        """
        if compare_to_monctecarlo:
            monte_carlo_iterations = 1000000
            env = Environment()
            agent = Agent(env)
            agent.monte_carlo_control(monte_carlo_iterations)
            Q_monte_carlo = agent.Q
            mse_all = []
            
        for episode in range(0, iters):
            E = np.zeros(self.number_of_features) 
            #initialize state and action          
            state = self.env.get_initial_state()
            reward = 0
            action = self.epsilon_greedy_linear_constant(state)
#            self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 
            while not state.terminal:                   
#                update number of visits
                self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1              
                [reward, state_forward] = self.env.step(state, action)                 
                action_forward = self.epsilon_greedy_linear_constant(state_forward)  
                
                if not state_forward.terminal:
                    current_estimate = reward + self.estimate_Q(state_forward, action_forward)
                else:
                    current_estimate = reward
                    
                previous_estimate = self.estimate_Q(state, action)
                delta = current_estimate - previous_estimate

                E = np.add(E, self.get_feature_vector(state, action))
                step_size = 0.01                
                self.weights += step_size * delta * E
                E = lambda_ * E

                action = action_forward
                state = state_forward
            if compare_to_monctecarlo:
                mse_all.append(compute_mse(self.approximation_to_Q(), Q_monte_carlo))
  
        if compare_to_monctecarlo:
#            print (mse_all[-1])
            plt.plot(range(0, iters), mse_all, 'r-')
            plt.xlabel("episodes")
            plt.ylabel("MSE")
#            plt.title("lambda = 0")
            plt.show()
            
        for (dealer_sum, player_sum), value in np.ndenumerate(self.V):
            s = State(dealer_sum+1, player_sum+1)
            self.Q[dealer_sum, player_sum ,0] = np.dot(self.get_feature_vector(s, Action.hit), self.weights)
            self.Q[dealer_sum, player_sum ,1] = np.dot(self.get_feature_vector(s, Action.stick), self.weights)
            self.V[dealer_sum, player_sum] = max(self.estimate_Q(s,Action.hit), self.estimate_Q(s,Action.stick))
示例#11
0
def move_coordinates():

    fen = Entry.query.first().board
    s = State()
    s.board = chess.Board(fen)

    if not s.board.is_game_over():
        source = int(request.args.get('from', default=''))
        target = int(request.args.get('to', default=''))
        promotion = True if request.args.get('promotion',
                                             default='') == 'true' else False

        move = s.board.san(
            chess.Move(source,
                       target,
                       promotion=chess.QUEEN if promotion else None))
        # MONTE:
        move_uci = chess.Move(source,
                              target,
                              promotion=chess.QUEEN if promotion else None)

        if move is not None and move != "":
            print("human moves", move)
            try:
                s.board.push_san(move)
                bk = Entry.query.update(dict(board=s.board.fen()))
                db.session.commit()
                if use_mc:
                    # MONTE: Note monte won't work on heroku bc it stores state;
                    ai_mc.push_move(move_uci)

                computer_move()
            except Exception:
                traceback.print_exc()
        fen = Entry.query.first().board
        s.board = chess.Board(fen)
        response = app.response_class(response=s.board.fen(), status=200)
        print(s.board)
        return response

    print("GAME IS OVER")
    response = app.response_class(response="game over", status=200)
    return response
示例#12
0
def computer_move():
    aimove = None
    fen = Entry.query.first().board
    s = State()
    s.board = chess.Board(fen)
    if not use_mc:
        # MINIMAX
        possible_moves = ai.minimax(s.board)
        probs = [x[1] for x in possible_moves]
        moves = [x[0] for x in possible_moves]
        probs = probs / np.sum(probs)
        aimove = np.random.choice(moves, p=probs)
        s.board.push(aimove)
    else:
        # MONTE: monte carlo agent
        aimove_mc, val, improved_policy = ai_mc.select_move(MC_SEARCH_ITER)
        s.board.push(chess.Move.from_uci(aimove_mc.a))

    bk = Entry.query.update(dict(board=s.board.fen()))
    db.session.commit()
示例#13
0
def build_max_policy(q_function):
    state_size = q_function.state_size
    velocity_size = q_function.velocity_size
    policy = Policy(state_size, velocity_size)

    for x, y in itertools.product(range(state_size[0]), range(state_size[1])):
        for v_x, v_y in itertools.product(range(velocity_size),
                                          range(velocity_size)):
            state = State((x, y), (v_x, v_y))
            policy.update(state, q_function.get_max_action(state))

    return policy
 def search(self, state, depth, max_player):
     """Start the MiniMax algorithm.
     :param state: The state to start from.
     :param depth: The maximum allowed depth for the algorithm.
     :param max_player: Whether this is a max node (True) or a min node (False).
     :return: A tuple: (The min max algorithm value, The direction in case of max node or None in min mode)
     """
     self.throw_exception_if_timeout(state)
     if self.goal and self.goal(state, max_player):
         return (self.utility(state, max_player), state.direction)
     if depth == 0:
         return (self.utility(state, max_player), state.direction)
     childrens = self.succ(state, max_player)
     if max_player:
         currMax = State(None, None, None, None, None, None)
         currMax.value = -np.inf
         for c in childrens:
             v = self.search(c, depth - 1, not max_player)
             c.value = v[0]
             currMax = max(currMax, c)
         return (currMax.value, currMax.direction)
     else:
         currMin = State(None, None, None, None, None, None)
         currMin.value = np.inf
         for c in childrens:
             v = self.search(c, depth - 1, not max_player)
             c.value = v[0]
             currMin = min(currMin, c)
         return (currMin.value, currMin.direction)
 def step(self, state, action):
     if(action == Action.hit):
         card = self.draw()
         player_sum = self.add_card(state.player_sum, card)
         state_result = State(state.dealer_card, player_sum)
         if (state_result.player_sum > self.win or state_result.player_sum < self.loss):
             reward = -1
             state_result.terminal = True               
         else:
             reward = 0                         
     elif(action == Action.stick):
         state_result = State(state.dealer_card, state.player_sum)
         dealer_sum = state.dealer_card
         while(dealer_sum < self.dealer_stick):
             card = self.draw()
             dealer_sum = self.add_card(dealer_sum, card)
             state_result.dealer_card = dealer_sum
             if (dealer_sum > self.win or dealer_sum < self.loss):
                 reward = 1
                 state_result.terminal = True
                 return [reward, state_result] 
                 
         if (dealer_sum > state.player_sum):
             reward = -1
         elif(dealer_sum == state.player_sum):
             reward = 0
         else:
             reward = 1
     
         state_result.terminal = True            
     return [reward, state_result]   
    def make_move(self, time_limit, players_score):
        """Make move with this Player.
        input:
            - time_limit: float, time limit for a single turn.
        output:
            - direction: tuple, specifing the Player's movement, chosen from self.directions
        """
        start_time = time.time()
        minimax_ret = 0
        iteration_time = 0
        depth = 1
        state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn)
        succ = self.get_legal_moves
        utility = self.calc_score
        preform_move = self.preform_move

        if players_score[0] - players_score[1] > self.penalty_score: #If it is worthy to end the game
            # print("Yessss, ", players_score[0], " ", players_score[1], " ", self.penalty_score)
            while time.time() - start_time < time_limit + 8:# We want to get to fine, end the game and win
                # minimax_ret = MiniMax(succ=succ,utility=utility, perform_move= preform_move).search(state=state, depth=depth, maximizing_player=True)
                minimax_ret = self.get_legal_moves(state.board, state.my_location)[0]
                minimax_ret = (0, self.calc_direction(state.my_location, minimax_ret))

            new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1])
            self.board[state.my_location[0]][state.my_location[1]] = -1
            self.board[new_pos[0]][new_pos[1]] = 1
            self.turn += 1
            return minimax_ret[1]

        #TODO: check if correct upperbound

        while 4 * iteration_time < time_limit - (time.time() - start_time) and time.time() - start_time < time_limit:  #total time = iter_time + 3*iter_time (the upper bound of the running time)
            moves = get_legal_moves(state.board, state.my_location)
            minimax_ret = [1, 2]
            if len(moves) == 1:
                minimax_ret[0] = None
                minimax_ret[1] = calc_direction(state.my_location, moves[0])
                break

            start_iteration = time.time()
            minimax_ret = MiniMax(succ=succ,utility=utility, perform_move= preform_move).search(state=state, depth=depth, maximizing_player=True)
            #print('depth        ', depth)
            iteration_time = time.time() - start_iteration
            depth += 1
        
        new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1])
        self.board[state.my_location[0]][state.my_location[1]] = -1
        self.board[new_pos[0]][new_pos[1]] = 1
        self.turn += 1
        return minimax_ret[1]
    def make_move(self, time_limit, players_score):
        """Make move with this Player.
        input:
            - time_limit: float, time limit for a single turn.
        output:
            - direction: tuple, specifing the Player's movement, chosen from self.directions
        """
        # TODO: erase the following line and implement this function.
        start_time = time.time()
        state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn)
        succ = self.sorted_moves
        utility = self.calc_score
        preform_move = self.preform_move

        state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn)


        minimax_ret = AlphaBeta(succ=succ,utility=utility, perform_move= preform_move).search(state=state, depth=4, maximizing_player=True)

        new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1])
        self.board[state.my_location[0]][state.my_location[1]] = -1
        self.board[new_pos[0]][new_pos[1]] = 1
        self.turn += 1
        return minimax_ret[1]
示例#18
0
def skip_test_submit_claim_on_transaction(alice, bob, charlie, erc20_plasma_ct, multisig_predicate):
    # Deposit and send a tx
    state0_alice_and_bob_deposit = erc20_plasma_ct.deposit_ERC20(alice.address,
                                                                 100,
                                                                 multisig_predicate,
                                                                 {'recipient': [alice.address, bob.address]})
    state1_alice_and_bob = State(state0_alice_and_bob_deposit.coin_id,
                                 0,
                                 multisig_predicate,
                                 {'recipient': [charlie.address]})
    erc20_plasma_ct.add_commitment([state1_alice_and_bob])  # Add the tx to the first commitment
    # Try submitting claim
    erc20_plasma_ct.submit_claim(state1_alice_and_bob, 0)
    # Check the claim was recorded
    assert len(erc20_plasma_ct.claim_queues) == 1
示例#19
0
def test_challenge_claim_with_invalid_state(alice, mallory, operator,
                                            erc20_plasma_ct,
                                            ownership_predicate):
    # Deposit and commit to invalid state
    commit0_alice_deposit = erc20_plasma_ct.deposit(
        alice.address, 100, ownership_predicate,
        {'owner': alice.address})  # Add deposit
    # Check that alice's balance was reduced
    assert erc20_plasma_ct.erc20_contract.balanceOf(alice.address) == 900
    # Uh oh! Malory creates an invalid state & commits it!!!
    state_mallory_ownership = State(ownership_predicate,
                                    {'owner': mallory.address})
    invalid_commit1_alice_to_mallory = Commitment(state_mallory_ownership,
                                                  commit0_alice_deposit.start,
                                                  commit0_alice_deposit.end,
                                                  0)  # Create commitment
    # Add the commitment
    erc20_plasma_ct.commitment_chain.commit_block(
        operator.address,
        {erc20_plasma_ct.address: [invalid_commit1_alice_to_mallory]})
    # Submit a claim for the invalid state
    invalid_commitment_claim_id = erc20_plasma_ct.claim_commitment(
        invalid_commit1_alice_to_mallory, 'merkle proof', mallory.address)
    # Oh no! Alice notices bad behavior and attempts withdrawal of deposit state
    deposit_claim_id = erc20_plasma_ct.claim_deposit(commit0_alice_deposit.end)
    # Alice isn't letting that other claim go through. She challenges it with her deposit!
    challenge = erc20_plasma_ct.challenge_claim(deposit_claim_id,
                                                invalid_commitment_claim_id)
    # Verify that the challenge was recorded
    assert challenge is not None and len(erc20_plasma_ct.challenges) == 1
    # Fast forward in time until the eth block allows the claim to be redeemable
    erc20_plasma_ct.eth.block_number = erc20_plasma_ct.claims[
        invalid_commitment_claim_id].eth_block_redeemable
    # Mallory attempts and fails to withdraw because there's another claim with priority
    try:
        erc20_plasma_ct.redeem_claim(mallory.address,
                                     invalid_commit1_alice_to_mallory.end)
        throws = False
    except Exception:
        throws = True
    assert throws
    # Now instead alice withdraws
    erc20_plasma_ct.redeem_claim(
        deposit_claim_id,
        erc20_plasma_ct.claims[deposit_claim_id].commitment.end)
    # Check that alice was sent her money!
    assert erc20_plasma_ct.erc20_contract.balanceOf(alice.address) == 1000
示例#20
0
    def next_move(self, state) -> Optional[dict]:
        state = State(**state)

        if state.last_alive:
            print("I won!! :)")

        if not state.alive:
            print("I'm dead :(")
            return None

        board = np.asarray(state.board, dtype=np.int)
        board = np.pad(board, 1, 'constant', constant_values=9)
        size = board.shape[0]

        if state.position[0] < 0:
            x = abs(state.position[0]) + 1
            x = size - x
        else:
            x = state.position[0] + 1
        xn = x - 2
        xp = x + 3

        if state.position[1] < 0:
            y = abs(state.position[1]) + 1
            y = size - y
        else:
            y = state.position[1] + 1
        yn = y - 2
        yp = y + 3

        direction = board[x, y]
        l_board = np.rot90(board[xn:xp, yn:yp], direction)

        print(str(l_board).replace('0', '-'))

        for choice in ACTIONS:
            move = ACTIONSCALC[ACTIONS.index(choice)]

            xt = 2 + move[0]
            yt = 2 + move[1]
            new_pos = l_board[xt, yt]

            state = SaveState(l_board, choice, False if new_pos > 0 else True)
            self.data.append(state)

        choice = random.choice(ACTIONS)
        return {"move": choice}
示例#21
0
	def step(self, action):
		new_velocity = (
			self._clamp_velocity_to_range(self._velocity[0] + action.x),
			self._clamp_velocity_to_range(self._velocity[1] + action.y))
		# Don't allow the velocity to be reduced to zero.
		if (new_velocity[0] > 0 or new_velocity[1] > 0):
			self._velocity = new_velocity
		new_pos = self._compute_move(self._position, self._velocity)

		if self._track.crosses_goal(self._position, new_pos):
			new_pos = self._track.snap_to_goal(self._position, new_pos)
			return TimeStep(State(new_pos, self._velocity), constants.GOAL_REWARD, terminal = True)
		elif self._track.out_of_range(new_pos):
			self.reset()
			return TimeStep(self._get_state(), constants.STEP_REWARD)
		else:
			self._position = new_pos
			return TimeStep(self._get_state(), constants.STEP_REWARD)
示例#22
0
def get_single_trace(env, agent, trace_idx, agent_traces, states_dict, args):
    """Implement a single trace while using the Trace and State classes"""
    trace = Trace()
    # ********* Implement here *****************
    curr_obs = env.reset()
    done = False
    while not done:
        a = agent.act(curr_obs)
        obs, r, done, infos = env.step(a)
        """Generate State"""
        state_img = env.render(mode='rgb_array')
        state_q_values = agent.get_state_action_values(obs)
        features = NotImplemented  #TODO implement here
        state_id = (trace_idx, trace.length)
        states_dict[state_id] = State(state_id, obs, state_q_values, features,
                                      state_img)
        """Add step and state to trace"""
        trace.update(obs, r, done, infos, a, state_id)

    agent_traces.append(trace)
示例#23
0
 def deposit(self, depositor, deposit_amount, predicate, parameters):
     assert deposit_amount > 0
     # Make the transfer
     self.erc20_contract.transferFrom(depositor, self.address,
                                      deposit_amount)
     # Record the deposit first by collecting the preceeding plasma block number
     preceding_plasma_block_number = len(self.commitment_chain.blocks) - 1
     # Next compute the start and end positions of the deposit
     deposit_start = self.total_deposits
     deposit_end = self.total_deposits + deposit_amount
     # Create the initial state which we will record to in this deposit
     initial_state = State(predicate, parameters)
     # Create the depoisit object
     deposit = Commitment(initial_state, deposit_start, deposit_end,
                          preceding_plasma_block_number)
     # And store the deposit in our mapping of ranges which can be claimed
     self.claimable_ranges[deposit_end] = deposit
     # Increment total deposits
     self.total_deposits += deposit_amount
     # Return deposit record
     return deposit
    def search(self,
               state,
               depth,
               is_father_max,
               alpha=ALPHA_VALUE_INIT,
               beta=BETA_VALUE_INIT):
        """Start the AlphaBeta algorithm.
        :param state: The state to start from.
        :param depth: The maximum allowed depth for the algorithm.
        :param is_father_max: Whether this is a max node (True) or a min node (False).
        :param alpha: alpha value
        :param beta: beta value
        :return: A tuple: (The min max algorithm value, The direction in case of max node or None in min mode)
        """

        self.throw_exception_if_timeout(state)
        if self.goal and self.goal(state):
            return (self.utility(state, is_father_max), state.direction)
        if depth == 0:
            return (self.utility(state, is_father_max), state.direction)
        children = self.succ(state, not is_father_max)
        if is_father_max:
            currMax = State(None, None, None, None, None, None, None, None,
                            None, None, None)
            currMax.value = -np.inf
            for c in children:
                v = self.search(c, depth - 1, not is_father_max, alpha, beta)
                c.value = v[0]
                currMax = max(currMax, c)
                alpha = max(currMax.value, alpha)
                if currMax.value >= beta:
                    return np.inf, currMax.direction
            # self.restore_father(is_father_max, state, children)
            return currMax.value, currMax.direction
        else:
            currMin = State(None, None, None, None, None, None, None, None,
                            None, None, None)
            currMin.value = np.inf
            for c in children:
                v = self.search(c, depth - 1, not is_father_max, alpha, beta)
                c.value = v[0]
                currMin = min(currMin, c)
                beta = min(currMin.value, beta)
                if currMin.value <= alpha:
                    return -np.inf, currMin.direction
            # self.restore_father(is_father_max, state, children)

            return (currMin.value, currMin.direction)
示例#25
0
def skip_test_submit_dispute_on_deposit(alice, bob, charlie, erc20_plasma_ct, multisig_predicate):
    # Deposit and send a tx
    state0_alice_and_bob_deposit = erc20_plasma_ct.deposit_ERC20(alice.address,
                                                                 100,
                                                                 multisig_predicate,
                                                                 {'recipient': [alice.address, bob.address]})
    state1_alice_and_bob = State(state0_alice_and_bob_deposit.coin_id,
                                 0,
                                 multisig_predicate,
                                 {'recipient': [charlie.address]})
    erc20_plasma_ct.add_commitment([state1_alice_and_bob])  # Add the tx to the first commitment
    # Create witness based on this commitment
    transition_witness0_alice_and_bob = MultiSigTransitionWitness([alice.address, bob.address], 0)
    # Try submitting claim on deposit
    deposit_claim = erc20_plasma_ct.submit_claim(state0_alice_and_bob_deposit)
    # Check the claim was recorded
    assert len(erc20_plasma_ct.claim_queues[state1_alice_and_bob.coin_id]) == 1
    # Now bob disputes claim with the spend
    erc20_plasma_ct.dispute_claim(bob.address, deposit_claim, transition_witness0_alice_and_bob, state1_alice_and_bob)
    # Check the claim was deleted
    assert len(erc20_plasma_ct.claim_queues[state1_alice_and_bob.coin_id]) == 0
示例#26
0
def test_redeem_challenged_claim(alice, mallory, operator, erc20_plasma_ct,
                                 ownership_predicate):
    # Deposit and then submit an invalid challenge
    commit0_mallory_deposit = erc20_plasma_ct.deposit(
        mallory.address, 100, ownership_predicate,
        {'owner': mallory.address})  # Add deposit
    # Create a new state & commitment for alice ownership
    state_alice_ownership = State(ownership_predicate,
                                  {'owner': alice.address})
    commit1_mallory_to_alice = Commitment(state_alice_ownership,
                                          commit0_mallory_deposit.start,
                                          commit0_mallory_deposit.end,
                                          0)  # Create commitment
    # Add the commit
    erc20_plasma_ct.commitment_chain.commit_block(
        operator.address,
        {erc20_plasma_ct.address: [commit1_mallory_to_alice]})
    # Now alice wants to withdraw, so submit a new claim on the funds
    claim_id = erc20_plasma_ct.claim_commitment(commit1_mallory_to_alice,
                                                'merkle proof', alice.address)
    # Uh oh! Mallory decides to withdraw and challenge the claim
    revoked_claim_id = erc20_plasma_ct.claim_deposit(
        commit0_mallory_deposit.end)
    challenge_id = erc20_plasma_ct.challenge_claim(revoked_claim_id, claim_id)
    # This revoked claim is then swiftly canceled by alice
    revocation_witness0_mallory_to_alice = OwnershipRevocationWitness(
        commit1_mallory_to_alice, mallory.address, 'merkle proof')
    erc20_plasma_ct.revoke_claim(10, revoked_claim_id,
                                 revocation_witness0_mallory_to_alice)
    # Remove the challenge for the revoked claim
    erc20_plasma_ct.remove_challenge(challenge_id)
    # Increment the eth block number
    erc20_plasma_ct.eth.block_number = erc20_plasma_ct.claims[
        claim_id].eth_block_redeemable
    # Now alice can withdraw!
    erc20_plasma_ct.redeem_claim(
        claim_id, erc20_plasma_ct.claims[claim_id].commitment.end)
    # Check that alice was sent her money!
    assert erc20_plasma_ct.erc20_contract.balanceOf(alice.address) == 1100
示例#27
0
    def __init__(self,
                 root,
                 num_threads=1,
                 download=False,
                 load=True,
                 splits=(1, ),
                 batch_size=1,
                 mode='train',
                 shuffle=True,
                 preload_to_gpu=False,
                 **options):
        try:
            self.state = State()
            self.is_cuda = self.state.use_cuda
        except TypeError:
            self.state = None
            self.is_cuda = False

        self.root = os.path.abspath(os.path.expanduser(root))
        assert (num_threads >= 0)
        self.num_threads = num_threads
        self.splits = splits
        self.batch_size = batch_size
        self.mode = mode
        self.shuffle = shuffle
        self.preload_to_gpu = preload_to_gpu
        self.options = options
        self.options.update(batch_size=batch_size,
                            mode=mode,
                            shuffle=shuffle,
                            preload_to_gpu=preload_to_gpu)

        if download is True and self.check_exists(self.root) is not True:
            self.download(self.root)

        self._data = []
        if load is True:
            self.load()
示例#28
0
    def next_move(self, state) -> Optional[dict]:

        state = State(**state)

        if state.game_over:
            print(f'round {self.count} ended')
            print(f"Game Over ... win's: {state.wins} | losses: {state.losses}")
            self.count += 1

            if self.count > 200:
                exit(0)

        if not state.alive:
            return None

        pad = self.pad
        board = np.asarray(state.board, dtype=np.int)
        board = np.pad(board, pad, 'constant', constant_values=9)

        x = state.position[0] + pad
        xn = x - pad
        xp = x + pad + 1

        y = state.position[1] + pad
        yn = y - pad
        yp = y + pad + 1

        direction = board[x, y]
        l_board = np.rot90(board[xn:xp, yn:yp], direction)
        f_board = l_board.flatten()
        f_board = np.where(f_board > 0, 9, f_board)

        pred = self.clf.predict([f_board])

        choice = ACTIONS[pred[0]]

        return {"move": choice}
示例#29
0
 def get_state(self):
     return State(self.state_action, self.state_reward, self.state_screen,
                  self.state_terminal, self.state_pob)
示例#30
0
from cases.wave_equation.dirichlet.derivative.derivative import WaveEquationDerivative
from utils import State

c = 2.0
num_grid_points = 1000
dt = 1 / (16 * num_grid_points * c)

params = {
    'num_grid_points': num_grid_points,
    'domain_size': 1.0,
    'dt': dt,
    'sampling_rate': 100
}

time_derivative_input = [c]

# case_sol_input = [c, [(1, 1.0), (2, 2.0)]]
axes = np.tile(
    np.linspace(0, params['domain_size'], num_grid_points + 1)[:-1],
    (2, 1))  # setup the axes
state = State(2, num_grid_points, axes, [("x", "u"), ("x", "v")])

state_vars = state.get_state_vars()
starting_cond = starting_conditions.GaussianBump(params['domain_size'] * 0.5,
                                                 50)

state_vars[0] = starting_cond.get_start_condition(axes[0])
#state_vars[0] = np.sin(axes[0] * 2 * np.pi / params['domain_size'])

run_utils.run_visual_without_solution(params, Explicit, WaveEquationDerivative,
                                      time_derivative_input, state)
示例#31
0
    assert (action_to_integer(Action(0, 0)) == 4)
    assert (action_to_integer(Action(1, 0)) == 5)

    # Check integer_to_action reverses it.
    assert (integer_to_action(action_to_integer(Action(1,
                                                       -1))) == Action(1, -1))

    # velocity_to_integer should work similarly.
    assert (velocity_to_integer((0, 0)) == 0)
    assert (velocity_to_integer((1, 0)) == 1)
    assert (velocity_to_integer((0, 1)) == 6)
    assert (velocity_to_integer((3, 3)) == 21)

    # Build a Q function and check we can update it.
    q_f = QFunction((70, 70), 6)
    q_f.set(State((15, 20), (2, 2)), Action(1, 1), 27)
    assert (q_f.get(State((15, 20), (2, 2)), Action(1, 1)) == 27)

    # Check the Q function can track visit counts too.
    q_f.increment_count(State((15, 20), (2, 2)), Action(1, 1), 1)
    q_f.increment_count(State((15, 20), (2, 2)), Action(1, 1), 26)
    assert (q_f.get_count(State((15, 20), (2, 2)), Action(1, 1)) == 27)

    # A maximising policy should now choose the action we assigned the value of
    # 27 whenever we're in that state.
    policy = build_max_policy(q_f)
    assert (policy.get_action(State((15, 20), (2, 2))) == Action(1, 1))

    # Epsilon-greedy policy with epsilon zero should follow the wrapped policy.
    # In this case, it takes the action we assigned the value of 27 above.
    e_greedy = EpsilonGreedyPolicy(0.0, policy, 9)