def test_commitments(alice, bob, operator, erc20_plasma_ct, ownership_predicate): # Deposit some funds commit0_alice_deposit = erc20_plasma_ct.deposit( alice.address, 100, ownership_predicate, {'recipient': alice.address}) commit1_bob_deposit = erc20_plasma_ct.deposit(alice.address, 100, ownership_predicate, {'recipient': bob.address}) # Create the new state updates which we plan to commit state_bob_ownership = State(ownership_predicate, {'recipient': bob.address}) state_alice_ownership = State(ownership_predicate, {'recipient': alice.address}) # Create the commitment objects based on the states which will be included in plasma blocks commit2_alice_to_bob = Commitment(state_bob_ownership, commit0_alice_deposit.start, commit0_alice_deposit.end, 0) commit3_bob_to_alice = Commitment(state_alice_ownership, commit1_bob_deposit.start, commit1_bob_deposit.end, 0) # Add the commitments erc20_plasma_ct.commitment_chain.commit_block(operator.address, { erc20_plasma_ct.address: [commit2_alice_to_bob, commit3_bob_to_alice] }) # Assert inclusion of our commitments assert erc20_plasma_ct.commitment_chain.validate_commitment( commit2_alice_to_bob, erc20_plasma_ct.address, None) assert erc20_plasma_ct.commitment_chain.validate_commitment( commit3_bob_to_alice, erc20_plasma_ct.address, None)
def _worker_init_fn(worker_id): worker_info = torch.utils.data.get_worker_info() seed = worker_info.seed # This value is determined by main process RNG and the worker id. seed_prng(seed + worker_id, use_cuda=State().use_cuda, deterministic=State().deterministic)
def step(self, state, action): if (action == Action.hit): card = self.draw() player_sum = self.add_card(state.player_sum, card) state_result = State(state.dealer_card, player_sum) if (state_result.player_sum > self.win or state_result.player_sum < self.loss): reward = -1 state_result.terminal = True else: reward = 0 elif (action == Action.stick): state_result = State(state.dealer_card, state.player_sum) dealer_sum = state.dealer_card while (dealer_sum < self.dealer_stick): card = self.draw() dealer_sum = self.add_card(dealer_sum, card) state_result.dealer_card = dealer_sum if (dealer_sum > self.win or dealer_sum < self.loss): reward = 1 state_result.terminal = True return [reward, state_result] if (dealer_sum > state.player_sum): reward = -1 elif (dealer_sum == state.player_sum): reward = 0 else: reward = 1 state_result.terminal = True return [reward, state_result]
def test_revoke_claim_on_deposit(alice, bob, operator, erc20_plasma_ct, ownership_predicate): # Deposit and send a tx commit0_alice_deposit = erc20_plasma_ct.deposit( alice.address, 100, ownership_predicate, {'owner': alice.address}) # Add deposit state_bob_ownership = State(ownership_predicate, {'owner': bob.address}) commit1_alice_to_bob = Commitment(state_bob_ownership, commit0_alice_deposit.start, commit0_alice_deposit.end, 0) # Create commitment # Add the commitment erc20_plasma_ct.commitment_chain.commit_block( operator.address, {erc20_plasma_ct.address: [commit1_alice_to_bob]}) revocation_witness0_alice_to_bob = OwnershipRevocationWitness( commit1_alice_to_bob, alice.address, 'merkle proof') # Try submitting claim on deposit deposit_claim_id = erc20_plasma_ct.claim_deposit(100) # Check the claim was recorded assert len(erc20_plasma_ct.claims) == 1 # Now bob revokes the claim with the spend inside the revocation witness erc20_plasma_ct.revoke_claim(10, deposit_claim_id, revocation_witness0_alice_to_bob) # Check the claim was revoked assert erc20_plasma_ct.claims[deposit_claim_id].is_revoked
def approximation_to_Q(self): Q = np.zeros((self.env.dealer_values, self.env.player_values, self.env.action_values)) for (dealer_sum, player_sum), value in np.ndenumerate(self.V): s = State(dealer_sum+1, player_sum+1) Q[dealer_sum, player_sum ,0] = np.dot(self.get_feature_vector(s, Action.hit), self.weights) Q[dealer_sum, player_sum ,1] = np.dot(self.get_feature_vector(s, Action.stick), self.weights) return Q
def preform_move(self, state: State, dest_location, IsMyTurn) -> State: board = state.board.copy() my_location = state.my_location rival_loaction = state.rival_location my_score = state.my_score rival_score = state.rival_score turn = state.turn penalty = state.fine_score fruits = state.fruits.copy() if IsMyTurn == True: state.turn + 1 board[my_location[0]][my_location[1]] = -1 if board[dest_location[0]][dest_location[1]] > 2: my_score += board[dest_location[0]][dest_location[1]] board[dest_location[0]][dest_location[1]] = 1 my_location = dest_location else: board[rival_loaction[0]][rival_loaction[1]] = -1 rival_loaction = dest_location if board[dest_location[0]][dest_location[1]] > 2: rival_score += board[dest_location[0]][dest_location[1]] board[dest_location[0]][dest_location[1]] = 2 if self.can_I_move(board, my_location) == False: my_score -= penalty if self.can_I_move(board, rival_loaction) == False: rival_score -= penalty new_state = State(board, penalty, my_score, rival_score, fruits, turn) return new_state
def test_submit_claim_on_commitment(alice, bob, operator, erc20_plasma_ct, ownership_predicate): # Deposit and send a tx commit0_alice_deposit = erc20_plasma_ct.deposit( alice.address, 100, ownership_predicate, {'owner': alice.address}) # Add deposit state_bob_ownership = State(ownership_predicate, {'owner': bob.address}) commit1_alice_to_bob = Commitment(state_bob_ownership, commit0_alice_deposit.start, commit0_alice_deposit.end, 0) # Create commitment # Add the commit erc20_plasma_ct.commitment_chain.commit_block( operator.address, {erc20_plasma_ct.address: [commit1_alice_to_bob]}) # Try submitting claim claim_id = erc20_plasma_ct.claim_commitment(commit1_alice_to_bob, 'merkle proof', bob.address) # Check the claim was recorded assert len(erc20_plasma_ct.claims) == 1 # Now increment the eth block to the redeemable block erc20_plasma_ct.eth.block_number = erc20_plasma_ct.claims[ claim_id].eth_block_redeemable # Finally try withdrawing the money! erc20_plasma_ct.redeem_claim(claim_id, commit1_alice_to_bob.end) # Check bob's balance! assert erc20_plasma_ct.erc20_contract.balanceOf( bob.address ) == 1100 # 1100 comes from bob having been sent 100 & already having 1000
def skip_test_invalid_tx_exit_queue_resolution(alice, bob, mallory, erc20_plasma_ct, multisig_predicate, erc20_ct): # Deposit and commit to an invalid state state0_alice_and_bob_deposit = erc20_plasma_ct.deposit_ERC20(alice.address, 100, multisig_predicate, {'recipient': [alice.address, bob.address]}) state1_mallory_to_mallory = State(state0_alice_and_bob_deposit.coin_id, 0, multisig_predicate, {'recipient': [mallory.address]}) erc20_plasma_ct.add_commitment([state1_mallory_to_mallory]) # Add the invalid tx to the first commitment # Submit a claim for the invalid state invalid_claim = erc20_plasma_ct.submit_claim(state1_mallory_to_mallory, 0) # Alice notices the invalid claim, and submits her own claim. Note that it is based on her deposit which is before the tx valid_claim = erc20_plasma_ct.submit_claim(state0_alice_and_bob_deposit) # Wait for the dispute period to end. erc20_plasma_ct.eth.block_number += multisig_predicate.dispute_duration # Mallory attempts and fails to withdraw because there's another claim with priority try: erc20_plasma_ct.resolve_claim(mallory.address, invalid_claim) throws = False except Exception: throws = True assert throws # Now alice and bob agree to send the money to a new on-chain multisig erc20_plasma_ct.resolve_claim(alice.address, valid_claim, ([alice.address, bob.address], 'on chain multisig address')) # Check that the balances have updated assert erc20_ct.balanceOf('on chain multisig address') == 100 assert erc20_ct.balanceOf(erc20_plasma_ct.address) == 0
def main(): logging.info("Starting.") with Storage(state_dirs) as storage: for i, hand in enumerate(hands5): compact_deck = (1 << 19) - 1 deck = expand_deck(hand, compact_deck) state = State(0, hand, deck) winning_probability(state, storage) logging.info("%d/%d hands processed." % (i + 1, len(hands5)))
def linear_sarsa(self, iters, lambda_, compare_to_monctecarlo = False): """ Linear Function Approximation of sarsa lambda algorithm """ if compare_to_monctecarlo: monte_carlo_iterations = 1000000 env = Environment() agent = Agent(env) agent.monte_carlo_control(monte_carlo_iterations) Q_monte_carlo = agent.Q mse_all = [] for episode in range(0, iters): E = np.zeros(self.number_of_features) #initialize state and action state = self.env.get_initial_state() reward = 0 action = self.epsilon_greedy_linear_constant(state) # self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 while not state.terminal: # update number of visits self.N[state.dealer_card - 1, state.player_sum - 1, Action.get_value(action)] += 1 [reward, state_forward] = self.env.step(state, action) action_forward = self.epsilon_greedy_linear_constant(state_forward) if not state_forward.terminal: current_estimate = reward + self.estimate_Q(state_forward, action_forward) else: current_estimate = reward previous_estimate = self.estimate_Q(state, action) delta = current_estimate - previous_estimate E = np.add(E, self.get_feature_vector(state, action)) step_size = 0.01 self.weights += step_size * delta * E E = lambda_ * E action = action_forward state = state_forward if compare_to_monctecarlo: mse_all.append(compute_mse(self.approximation_to_Q(), Q_monte_carlo)) if compare_to_monctecarlo: # print (mse_all[-1]) plt.plot(range(0, iters), mse_all, 'r-') plt.xlabel("episodes") plt.ylabel("MSE") # plt.title("lambda = 0") plt.show() for (dealer_sum, player_sum), value in np.ndenumerate(self.V): s = State(dealer_sum+1, player_sum+1) self.Q[dealer_sum, player_sum ,0] = np.dot(self.get_feature_vector(s, Action.hit), self.weights) self.Q[dealer_sum, player_sum ,1] = np.dot(self.get_feature_vector(s, Action.stick), self.weights) self.V[dealer_sum, player_sum] = max(self.estimate_Q(s,Action.hit), self.estimate_Q(s,Action.stick))
def move_coordinates(): fen = Entry.query.first().board s = State() s.board = chess.Board(fen) if not s.board.is_game_over(): source = int(request.args.get('from', default='')) target = int(request.args.get('to', default='')) promotion = True if request.args.get('promotion', default='') == 'true' else False move = s.board.san( chess.Move(source, target, promotion=chess.QUEEN if promotion else None)) # MONTE: move_uci = chess.Move(source, target, promotion=chess.QUEEN if promotion else None) if move is not None and move != "": print("human moves", move) try: s.board.push_san(move) bk = Entry.query.update(dict(board=s.board.fen())) db.session.commit() if use_mc: # MONTE: Note monte won't work on heroku bc it stores state; ai_mc.push_move(move_uci) computer_move() except Exception: traceback.print_exc() fen = Entry.query.first().board s.board = chess.Board(fen) response = app.response_class(response=s.board.fen(), status=200) print(s.board) return response print("GAME IS OVER") response = app.response_class(response="game over", status=200) return response
def computer_move(): aimove = None fen = Entry.query.first().board s = State() s.board = chess.Board(fen) if not use_mc: # MINIMAX possible_moves = ai.minimax(s.board) probs = [x[1] for x in possible_moves] moves = [x[0] for x in possible_moves] probs = probs / np.sum(probs) aimove = np.random.choice(moves, p=probs) s.board.push(aimove) else: # MONTE: monte carlo agent aimove_mc, val, improved_policy = ai_mc.select_move(MC_SEARCH_ITER) s.board.push(chess.Move.from_uci(aimove_mc.a)) bk = Entry.query.update(dict(board=s.board.fen())) db.session.commit()
def build_max_policy(q_function): state_size = q_function.state_size velocity_size = q_function.velocity_size policy = Policy(state_size, velocity_size) for x, y in itertools.product(range(state_size[0]), range(state_size[1])): for v_x, v_y in itertools.product(range(velocity_size), range(velocity_size)): state = State((x, y), (v_x, v_y)) policy.update(state, q_function.get_max_action(state)) return policy
def search(self, state, depth, max_player): """Start the MiniMax algorithm. :param state: The state to start from. :param depth: The maximum allowed depth for the algorithm. :param max_player: Whether this is a max node (True) or a min node (False). :return: A tuple: (The min max algorithm value, The direction in case of max node or None in min mode) """ self.throw_exception_if_timeout(state) if self.goal and self.goal(state, max_player): return (self.utility(state, max_player), state.direction) if depth == 0: return (self.utility(state, max_player), state.direction) childrens = self.succ(state, max_player) if max_player: currMax = State(None, None, None, None, None, None) currMax.value = -np.inf for c in childrens: v = self.search(c, depth - 1, not max_player) c.value = v[0] currMax = max(currMax, c) return (currMax.value, currMax.direction) else: currMin = State(None, None, None, None, None, None) currMin.value = np.inf for c in childrens: v = self.search(c, depth - 1, not max_player) c.value = v[0] currMin = min(currMin, c) return (currMin.value, currMin.direction)
def step(self, state, action): if(action == Action.hit): card = self.draw() player_sum = self.add_card(state.player_sum, card) state_result = State(state.dealer_card, player_sum) if (state_result.player_sum > self.win or state_result.player_sum < self.loss): reward = -1 state_result.terminal = True else: reward = 0 elif(action == Action.stick): state_result = State(state.dealer_card, state.player_sum) dealer_sum = state.dealer_card while(dealer_sum < self.dealer_stick): card = self.draw() dealer_sum = self.add_card(dealer_sum, card) state_result.dealer_card = dealer_sum if (dealer_sum > self.win or dealer_sum < self.loss): reward = 1 state_result.terminal = True return [reward, state_result] if (dealer_sum > state.player_sum): reward = -1 elif(dealer_sum == state.player_sum): reward = 0 else: reward = 1 state_result.terminal = True return [reward, state_result]
def make_move(self, time_limit, players_score): """Make move with this Player. input: - time_limit: float, time limit for a single turn. output: - direction: tuple, specifing the Player's movement, chosen from self.directions """ start_time = time.time() minimax_ret = 0 iteration_time = 0 depth = 1 state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn) succ = self.get_legal_moves utility = self.calc_score preform_move = self.preform_move if players_score[0] - players_score[1] > self.penalty_score: #If it is worthy to end the game # print("Yessss, ", players_score[0], " ", players_score[1], " ", self.penalty_score) while time.time() - start_time < time_limit + 8:# We want to get to fine, end the game and win # minimax_ret = MiniMax(succ=succ,utility=utility, perform_move= preform_move).search(state=state, depth=depth, maximizing_player=True) minimax_ret = self.get_legal_moves(state.board, state.my_location)[0] minimax_ret = (0, self.calc_direction(state.my_location, minimax_ret)) new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1]) self.board[state.my_location[0]][state.my_location[1]] = -1 self.board[new_pos[0]][new_pos[1]] = 1 self.turn += 1 return minimax_ret[1] #TODO: check if correct upperbound while 4 * iteration_time < time_limit - (time.time() - start_time) and time.time() - start_time < time_limit: #total time = iter_time + 3*iter_time (the upper bound of the running time) moves = get_legal_moves(state.board, state.my_location) minimax_ret = [1, 2] if len(moves) == 1: minimax_ret[0] = None minimax_ret[1] = calc_direction(state.my_location, moves[0]) break start_iteration = time.time() minimax_ret = MiniMax(succ=succ,utility=utility, perform_move= preform_move).search(state=state, depth=depth, maximizing_player=True) #print('depth ', depth) iteration_time = time.time() - start_iteration depth += 1 new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1]) self.board[state.my_location[0]][state.my_location[1]] = -1 self.board[new_pos[0]][new_pos[1]] = 1 self.turn += 1 return minimax_ret[1]
def make_move(self, time_limit, players_score): """Make move with this Player. input: - time_limit: float, time limit for a single turn. output: - direction: tuple, specifing the Player's movement, chosen from self.directions """ # TODO: erase the following line and implement this function. start_time = time.time() state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn) succ = self.sorted_moves utility = self.calc_score preform_move = self.preform_move state = State(self.board, self.penalty_score, players_score[0], players_score[1], self.cur_fruits, self.turn) minimax_ret = AlphaBeta(succ=succ,utility=utility, perform_move= preform_move).search(state=state, depth=4, maximizing_player=True) new_pos = (state.my_location[0] + minimax_ret[1][0], state.my_location[1] + minimax_ret[1][1]) self.board[state.my_location[0]][state.my_location[1]] = -1 self.board[new_pos[0]][new_pos[1]] = 1 self.turn += 1 return minimax_ret[1]
def skip_test_submit_claim_on_transaction(alice, bob, charlie, erc20_plasma_ct, multisig_predicate): # Deposit and send a tx state0_alice_and_bob_deposit = erc20_plasma_ct.deposit_ERC20(alice.address, 100, multisig_predicate, {'recipient': [alice.address, bob.address]}) state1_alice_and_bob = State(state0_alice_and_bob_deposit.coin_id, 0, multisig_predicate, {'recipient': [charlie.address]}) erc20_plasma_ct.add_commitment([state1_alice_and_bob]) # Add the tx to the first commitment # Try submitting claim erc20_plasma_ct.submit_claim(state1_alice_and_bob, 0) # Check the claim was recorded assert len(erc20_plasma_ct.claim_queues) == 1
def test_challenge_claim_with_invalid_state(alice, mallory, operator, erc20_plasma_ct, ownership_predicate): # Deposit and commit to invalid state commit0_alice_deposit = erc20_plasma_ct.deposit( alice.address, 100, ownership_predicate, {'owner': alice.address}) # Add deposit # Check that alice's balance was reduced assert erc20_plasma_ct.erc20_contract.balanceOf(alice.address) == 900 # Uh oh! Malory creates an invalid state & commits it!!! state_mallory_ownership = State(ownership_predicate, {'owner': mallory.address}) invalid_commit1_alice_to_mallory = Commitment(state_mallory_ownership, commit0_alice_deposit.start, commit0_alice_deposit.end, 0) # Create commitment # Add the commitment erc20_plasma_ct.commitment_chain.commit_block( operator.address, {erc20_plasma_ct.address: [invalid_commit1_alice_to_mallory]}) # Submit a claim for the invalid state invalid_commitment_claim_id = erc20_plasma_ct.claim_commitment( invalid_commit1_alice_to_mallory, 'merkle proof', mallory.address) # Oh no! Alice notices bad behavior and attempts withdrawal of deposit state deposit_claim_id = erc20_plasma_ct.claim_deposit(commit0_alice_deposit.end) # Alice isn't letting that other claim go through. She challenges it with her deposit! challenge = erc20_plasma_ct.challenge_claim(deposit_claim_id, invalid_commitment_claim_id) # Verify that the challenge was recorded assert challenge is not None and len(erc20_plasma_ct.challenges) == 1 # Fast forward in time until the eth block allows the claim to be redeemable erc20_plasma_ct.eth.block_number = erc20_plasma_ct.claims[ invalid_commitment_claim_id].eth_block_redeemable # Mallory attempts and fails to withdraw because there's another claim with priority try: erc20_plasma_ct.redeem_claim(mallory.address, invalid_commit1_alice_to_mallory.end) throws = False except Exception: throws = True assert throws # Now instead alice withdraws erc20_plasma_ct.redeem_claim( deposit_claim_id, erc20_plasma_ct.claims[deposit_claim_id].commitment.end) # Check that alice was sent her money! assert erc20_plasma_ct.erc20_contract.balanceOf(alice.address) == 1000
def next_move(self, state) -> Optional[dict]: state = State(**state) if state.last_alive: print("I won!! :)") if not state.alive: print("I'm dead :(") return None board = np.asarray(state.board, dtype=np.int) board = np.pad(board, 1, 'constant', constant_values=9) size = board.shape[0] if state.position[0] < 0: x = abs(state.position[0]) + 1 x = size - x else: x = state.position[0] + 1 xn = x - 2 xp = x + 3 if state.position[1] < 0: y = abs(state.position[1]) + 1 y = size - y else: y = state.position[1] + 1 yn = y - 2 yp = y + 3 direction = board[x, y] l_board = np.rot90(board[xn:xp, yn:yp], direction) print(str(l_board).replace('0', '-')) for choice in ACTIONS: move = ACTIONSCALC[ACTIONS.index(choice)] xt = 2 + move[0] yt = 2 + move[1] new_pos = l_board[xt, yt] state = SaveState(l_board, choice, False if new_pos > 0 else True) self.data.append(state) choice = random.choice(ACTIONS) return {"move": choice}
def step(self, action): new_velocity = ( self._clamp_velocity_to_range(self._velocity[0] + action.x), self._clamp_velocity_to_range(self._velocity[1] + action.y)) # Don't allow the velocity to be reduced to zero. if (new_velocity[0] > 0 or new_velocity[1] > 0): self._velocity = new_velocity new_pos = self._compute_move(self._position, self._velocity) if self._track.crosses_goal(self._position, new_pos): new_pos = self._track.snap_to_goal(self._position, new_pos) return TimeStep(State(new_pos, self._velocity), constants.GOAL_REWARD, terminal = True) elif self._track.out_of_range(new_pos): self.reset() return TimeStep(self._get_state(), constants.STEP_REWARD) else: self._position = new_pos return TimeStep(self._get_state(), constants.STEP_REWARD)
def get_single_trace(env, agent, trace_idx, agent_traces, states_dict, args): """Implement a single trace while using the Trace and State classes""" trace = Trace() # ********* Implement here ***************** curr_obs = env.reset() done = False while not done: a = agent.act(curr_obs) obs, r, done, infos = env.step(a) """Generate State""" state_img = env.render(mode='rgb_array') state_q_values = agent.get_state_action_values(obs) features = NotImplemented #TODO implement here state_id = (trace_idx, trace.length) states_dict[state_id] = State(state_id, obs, state_q_values, features, state_img) """Add step and state to trace""" trace.update(obs, r, done, infos, a, state_id) agent_traces.append(trace)
def deposit(self, depositor, deposit_amount, predicate, parameters): assert deposit_amount > 0 # Make the transfer self.erc20_contract.transferFrom(depositor, self.address, deposit_amount) # Record the deposit first by collecting the preceeding plasma block number preceding_plasma_block_number = len(self.commitment_chain.blocks) - 1 # Next compute the start and end positions of the deposit deposit_start = self.total_deposits deposit_end = self.total_deposits + deposit_amount # Create the initial state which we will record to in this deposit initial_state = State(predicate, parameters) # Create the depoisit object deposit = Commitment(initial_state, deposit_start, deposit_end, preceding_plasma_block_number) # And store the deposit in our mapping of ranges which can be claimed self.claimable_ranges[deposit_end] = deposit # Increment total deposits self.total_deposits += deposit_amount # Return deposit record return deposit
def search(self, state, depth, is_father_max, alpha=ALPHA_VALUE_INIT, beta=BETA_VALUE_INIT): """Start the AlphaBeta algorithm. :param state: The state to start from. :param depth: The maximum allowed depth for the algorithm. :param is_father_max: Whether this is a max node (True) or a min node (False). :param alpha: alpha value :param beta: beta value :return: A tuple: (The min max algorithm value, The direction in case of max node or None in min mode) """ self.throw_exception_if_timeout(state) if self.goal and self.goal(state): return (self.utility(state, is_father_max), state.direction) if depth == 0: return (self.utility(state, is_father_max), state.direction) children = self.succ(state, not is_father_max) if is_father_max: currMax = State(None, None, None, None, None, None, None, None, None, None, None) currMax.value = -np.inf for c in children: v = self.search(c, depth - 1, not is_father_max, alpha, beta) c.value = v[0] currMax = max(currMax, c) alpha = max(currMax.value, alpha) if currMax.value >= beta: return np.inf, currMax.direction # self.restore_father(is_father_max, state, children) return currMax.value, currMax.direction else: currMin = State(None, None, None, None, None, None, None, None, None, None, None) currMin.value = np.inf for c in children: v = self.search(c, depth - 1, not is_father_max, alpha, beta) c.value = v[0] currMin = min(currMin, c) beta = min(currMin.value, beta) if currMin.value <= alpha: return -np.inf, currMin.direction # self.restore_father(is_father_max, state, children) return (currMin.value, currMin.direction)
def skip_test_submit_dispute_on_deposit(alice, bob, charlie, erc20_plasma_ct, multisig_predicate): # Deposit and send a tx state0_alice_and_bob_deposit = erc20_plasma_ct.deposit_ERC20(alice.address, 100, multisig_predicate, {'recipient': [alice.address, bob.address]}) state1_alice_and_bob = State(state0_alice_and_bob_deposit.coin_id, 0, multisig_predicate, {'recipient': [charlie.address]}) erc20_plasma_ct.add_commitment([state1_alice_and_bob]) # Add the tx to the first commitment # Create witness based on this commitment transition_witness0_alice_and_bob = MultiSigTransitionWitness([alice.address, bob.address], 0) # Try submitting claim on deposit deposit_claim = erc20_plasma_ct.submit_claim(state0_alice_and_bob_deposit) # Check the claim was recorded assert len(erc20_plasma_ct.claim_queues[state1_alice_and_bob.coin_id]) == 1 # Now bob disputes claim with the spend erc20_plasma_ct.dispute_claim(bob.address, deposit_claim, transition_witness0_alice_and_bob, state1_alice_and_bob) # Check the claim was deleted assert len(erc20_plasma_ct.claim_queues[state1_alice_and_bob.coin_id]) == 0
def test_redeem_challenged_claim(alice, mallory, operator, erc20_plasma_ct, ownership_predicate): # Deposit and then submit an invalid challenge commit0_mallory_deposit = erc20_plasma_ct.deposit( mallory.address, 100, ownership_predicate, {'owner': mallory.address}) # Add deposit # Create a new state & commitment for alice ownership state_alice_ownership = State(ownership_predicate, {'owner': alice.address}) commit1_mallory_to_alice = Commitment(state_alice_ownership, commit0_mallory_deposit.start, commit0_mallory_deposit.end, 0) # Create commitment # Add the commit erc20_plasma_ct.commitment_chain.commit_block( operator.address, {erc20_plasma_ct.address: [commit1_mallory_to_alice]}) # Now alice wants to withdraw, so submit a new claim on the funds claim_id = erc20_plasma_ct.claim_commitment(commit1_mallory_to_alice, 'merkle proof', alice.address) # Uh oh! Mallory decides to withdraw and challenge the claim revoked_claim_id = erc20_plasma_ct.claim_deposit( commit0_mallory_deposit.end) challenge_id = erc20_plasma_ct.challenge_claim(revoked_claim_id, claim_id) # This revoked claim is then swiftly canceled by alice revocation_witness0_mallory_to_alice = OwnershipRevocationWitness( commit1_mallory_to_alice, mallory.address, 'merkle proof') erc20_plasma_ct.revoke_claim(10, revoked_claim_id, revocation_witness0_mallory_to_alice) # Remove the challenge for the revoked claim erc20_plasma_ct.remove_challenge(challenge_id) # Increment the eth block number erc20_plasma_ct.eth.block_number = erc20_plasma_ct.claims[ claim_id].eth_block_redeemable # Now alice can withdraw! erc20_plasma_ct.redeem_claim( claim_id, erc20_plasma_ct.claims[claim_id].commitment.end) # Check that alice was sent her money! assert erc20_plasma_ct.erc20_contract.balanceOf(alice.address) == 1100
def __init__(self, root, num_threads=1, download=False, load=True, splits=(1, ), batch_size=1, mode='train', shuffle=True, preload_to_gpu=False, **options): try: self.state = State() self.is_cuda = self.state.use_cuda except TypeError: self.state = None self.is_cuda = False self.root = os.path.abspath(os.path.expanduser(root)) assert (num_threads >= 0) self.num_threads = num_threads self.splits = splits self.batch_size = batch_size self.mode = mode self.shuffle = shuffle self.preload_to_gpu = preload_to_gpu self.options = options self.options.update(batch_size=batch_size, mode=mode, shuffle=shuffle, preload_to_gpu=preload_to_gpu) if download is True and self.check_exists(self.root) is not True: self.download(self.root) self._data = [] if load is True: self.load()
def next_move(self, state) -> Optional[dict]: state = State(**state) if state.game_over: print(f'round {self.count} ended') print(f"Game Over ... win's: {state.wins} | losses: {state.losses}") self.count += 1 if self.count > 200: exit(0) if not state.alive: return None pad = self.pad board = np.asarray(state.board, dtype=np.int) board = np.pad(board, pad, 'constant', constant_values=9) x = state.position[0] + pad xn = x - pad xp = x + pad + 1 y = state.position[1] + pad yn = y - pad yp = y + pad + 1 direction = board[x, y] l_board = np.rot90(board[xn:xp, yn:yp], direction) f_board = l_board.flatten() f_board = np.where(f_board > 0, 9, f_board) pred = self.clf.predict([f_board]) choice = ACTIONS[pred[0]] return {"move": choice}
def get_state(self): return State(self.state_action, self.state_reward, self.state_screen, self.state_terminal, self.state_pob)
from cases.wave_equation.dirichlet.derivative.derivative import WaveEquationDerivative from utils import State c = 2.0 num_grid_points = 1000 dt = 1 / (16 * num_grid_points * c) params = { 'num_grid_points': num_grid_points, 'domain_size': 1.0, 'dt': dt, 'sampling_rate': 100 } time_derivative_input = [c] # case_sol_input = [c, [(1, 1.0), (2, 2.0)]] axes = np.tile( np.linspace(0, params['domain_size'], num_grid_points + 1)[:-1], (2, 1)) # setup the axes state = State(2, num_grid_points, axes, [("x", "u"), ("x", "v")]) state_vars = state.get_state_vars() starting_cond = starting_conditions.GaussianBump(params['domain_size'] * 0.5, 50) state_vars[0] = starting_cond.get_start_condition(axes[0]) #state_vars[0] = np.sin(axes[0] * 2 * np.pi / params['domain_size']) run_utils.run_visual_without_solution(params, Explicit, WaveEquationDerivative, time_derivative_input, state)
assert (action_to_integer(Action(0, 0)) == 4) assert (action_to_integer(Action(1, 0)) == 5) # Check integer_to_action reverses it. assert (integer_to_action(action_to_integer(Action(1, -1))) == Action(1, -1)) # velocity_to_integer should work similarly. assert (velocity_to_integer((0, 0)) == 0) assert (velocity_to_integer((1, 0)) == 1) assert (velocity_to_integer((0, 1)) == 6) assert (velocity_to_integer((3, 3)) == 21) # Build a Q function and check we can update it. q_f = QFunction((70, 70), 6) q_f.set(State((15, 20), (2, 2)), Action(1, 1), 27) assert (q_f.get(State((15, 20), (2, 2)), Action(1, 1)) == 27) # Check the Q function can track visit counts too. q_f.increment_count(State((15, 20), (2, 2)), Action(1, 1), 1) q_f.increment_count(State((15, 20), (2, 2)), Action(1, 1), 26) assert (q_f.get_count(State((15, 20), (2, 2)), Action(1, 1)) == 27) # A maximising policy should now choose the action we assigned the value of # 27 whenever we're in that state. policy = build_max_policy(q_f) assert (policy.get_action(State((15, 20), (2, 2))) == Action(1, 1)) # Epsilon-greedy policy with epsilon zero should follow the wrapped policy. # In this case, it takes the action we assigned the value of 27 above. e_greedy = EpsilonGreedyPolicy(0.0, policy, 9)