def __init__(self, team, champ_ids = get_champion_ids(), num_positions = 5, draft = Draft('default')): #TODO (Devin): This should make sure that numChampions >= num_positions self.num_champions = len(champ_ids) self.num_positions = num_positions self.num_actions = (self.num_positions+1)*self.num_champions self.state_index_to_champ_id = {i:k for i,k in zip(range(self.num_champions),champ_ids)} self.champ_id_to_state_index = {k:i for i,k in zip(range(self.num_champions),champ_ids)} self.state = np.zeros((self.num_champions, self.num_positions+2), dtype=bool) self.picks = [] self.bans = [] self.selected_pos = [] self.team = team self.draft_structure = draft # Get phase information from draft self.BAN_PHASE_LENGTHS = self.draft_structure.PHASE_LENGTHS[DraftState.BAN_PHASE] self.PICK_PHASE_LENGTHS = self.draft_structure.PHASE_LENGTHS[DraftState.PICK_PHASE] # The dicts pos_to_pos_index and pos_index_to_pos contain the mapping # from position labels to indices to the state matrix and vice versa. self.positions = [i-1 for i in range(num_positions+2)] self.pos_indices = [1,0] self.pos_indices.extend(range(2,num_positions+2)) self.pos_to_pos_index = dict(zip(self.positions,self.pos_indices)) self.pos_index_to_pos = dict(zip(self.pos_indices,self.positions))
def dueling_networks(path_to_model): valid_champ_ids = cinfo.get_champion_ids() # Two states are maintained: one corresponding to the perception of the draft # according to each of the teams. blue_state = DraftState(DraftState.BLUE_TEAM, valid_champ_ids) red_state = DraftState(DraftState.RED_TEAM, valid_champ_ids) draft = {0: blue_state, 1: red_state} with tf.Session() as sess: saver = tf.train.import_meta_graph( "{path}.ckpt.meta".format(path=path_to_model)) saver.restore(sess, "{path}.ckpt".format(path=path_to_model)) online_out = tf.get_default_graph().get_tensor_by_name( "online/outputs:0") online_pred = tf.get_default_graph().get_tensor_by_name( "online/prediction:0") online_input = tf.get_default_graph().get_tensor_by_name( "online/inputs:0") online_secondary_input = tf.get_default_graph().get_tensor_by_name( "online/secondary_inputs:0") submission_count = 0 while (blue_state.evaluate() != DraftState.DRAFT_COMPLETE and red_state.evaluate() != DraftState.DRAFT_COMPLETE): active_team = get_active_team(submission_count) inactive_team = 0 if active_team else 1 print("active {}".format(active_team)) state = draft[active_team] pred_act = sess.run(online_pred, feed_dict={ online_input: [state.format_state()], online_secondary_input: [state.format_secondary_inputs()] }) cid, pos = state.format_action(pred_act[0]) print("cid={} pos={}".format(cid, pos)) # Update active state state.update(cid, pos) # Update inactive state, remembering to mask non-bans submitted by opponent inactive_pos = pos if pos == -1 else 0 draft[inactive_team].update(cid, inactive_pos) submission_count += 1 return draft
def process_match(match, team, augment_data=True): """ process_match takes an input match and breaks each incremental pick and ban down the draft into experiences (aka "memories"). Args: match (dict): match dictionary with pick and ban data for a single game. team (DraftState.BLUE_TEAM or DraftState.RED_TEAM): The team perspective that is used to process match The selected team has the positions for each pick explicitly included with the experience while the "opposing" team has the assigned positions for its champion picks masked. augment_data (optional) (bool): flag controlling the randomized ordering of submissions that do not affect the draft as a whole Returns: experiences ( list(tuple) ): list of experience tuples. Each experience is of the form (s, a, r, s') where: - s and s' are DraftState states before and after a single action - a is the (stateIndex, position) tuple of selected champion to be banned or picked. position = 0 for submissions by the opposing team - r is the integer reward obtained from submitting the action a process_match() can take the vantage from both sides of the draft to parse for memories. This means we can ultimately sample from both winning drafts (positive reinforcement) and losing drafts (negative reinforcement) when training. """ experiences = [] valid_champ_ids = get_champion_ids() # This section controls data agumentation of the match. Certain submissions in the draft are # submitted consecutively by the same team during the same phase (ie team1 pick0 -> team1 pick1). # Although these submissions were produced in a particular order, from a draft perspective # there is no difference between submissions of the form # team1 pick0 -> team1 pick1 vs team1 pick1 -> team0 pickA # provided that the two picks are from the same phase (both bans or both picks). # Therefore it is possible to augment the order in which these submissions are processed. # Note that we can also augment the banning phase if desired. Although these submissions technically # fall outside of the conditions listed above, in practice bans made in the same phase are # interchangable in order. # Build queue of actions from match reference (augmenting if desired) augments_list = [ ("blue","bans",slice(0,3)), # Blue bans 0,1,2 are augmentable ("blue","bans",slice(3,5)), # Blue bans 3,4 are augmentable ("red","bans",slice(0,3)), ("red","bans",slice(3,5)), ("blue","picks",slice(1,3)), # Blue picks 1,2 are augmentable ("blue","picks",slice(3,5)), # Blue picks 3,4 are augmentable ("red","picks",slice(0,2)) # Red picks 0,1 are augmentable ] if(augment_data): augmented_match = deepcopy(match) # Deepcopy match to avoid side effects for aug in augments_list: (k1,k2,aug_range) = aug count = len(augmented_match[k1][k2][aug_range]) augmented_match[k1][k2][aug_range] = random.sample(augmented_match[k1][k2][aug_range],count) action_queue = build_action_queue(augmented_match) else: action_queue = build_action_queue(match) # Set up draft state draft = DraftState(team,valid_champ_ids) finish_memory = False while action_queue: # Get next pick from deque submission = action_queue.popleft() (submitting_team, pick, position) = submission # There are two conditions under which we want to finalize a memory: # 1. Non-designated team has finished submitting picks for this phase (ie next submission belongs to the designated team) # 2. Draft is complete (no further picks in the draft) if submitting_team == team: if finish_memory: # This is case 1 to store memory r = get_reward(draft, match, a, a) s_next = deepcopy(draft) memory = (s, a, r, s_next) experiences.append(memory) finish_memory = False # Memory starts when upcoming pick belongs to designated team s = deepcopy(draft) # Store action = (champIndex, pos) a = (pick, position) finish_memory = True else: # Mask positions for pick submissions belonging to the non-designated team if position != -1: position = 0 draft.update(pick, position) # Once the queue is empty, store last memory. This is case 2 above. # There is always an outstanding memory at the completion of the draft. # RED_TEAM always gets last pick. Therefore: # if team = BLUE_TEAM -> There is an outstanding memory from last RED_TEAM submission # if team = RED_TEAM -> Memory is open from just before our last submission if(draft.evaluate() == DraftState.DRAFT_COMPLETE): assert finish_memory == True r = get_reward(draft, match, a, a) s_next = deepcopy(draft) memory = (s, a, r, s_next) experiences.append(memory) else: print("{} vs {}".format(match["blue_team"],match["red_team"])) draft.display() print("Error code {}".format(draft.evaluate())) print("Number of experiences {}".format(len(experiences))) for experience in experiences: _,a,_,_ = experience print(a) print("")#raise return experiences
from draftstate import DraftState import champion_info as cinfo import match_processing as mp from models import qNetwork, softmax from trainer import DDQNTrainer, SoftmaxTrainer from models.inference_model import QNetInferenceModel, SoftmaxInferenceModel import tensorflow as tf print("") print("********************************") print("** Beginning Swain Bot Run! **") print("********************************") valid_champ_ids = cinfo.get_champion_ids() print("Number of valid championIds: {}".format(len(valid_champ_ids))) # Store training match data in a json file (for reuse later) reuse_matches = True val_count = 40 save_match_pool = False validation_ids = [] training_ids = [] if reuse_matches: print("Using match data in match_pool.txt.") with open('match_pool.txt', 'r') as infile: data = json.load(infile) validation_ids = data["validation_ids"] training_ids = data["training_ids"]
def self_train(sess, explore_prob, n_experiences=1): """ Runs model currently held in TF Session sess through one self training loop. Returns negative memory if model fails to complete draft. Args: sess (tf.Session()): TF Session used to run model. explore_prob (float): Probability that each pick will explore state space by submitting a random action n_experiences (int): Number of experiences desired. Returns: experiences [(s,a,r,s')]: list of expierence tuples from illegal submissions made by either side of draft None if network completes draft without illegal actions """ MAX_DRAFT_ITERATIONS = 100 # Maximum number of drafts to iterate through assert n_experiences > 0, "Number of experiences must be non-negative" valid_champ_ids = cinfo.get_champion_ids() match = {"winner": None} # Blank match for rewards processing # Two states are maintained: one corresponding to the perception of the draft # according to each of the teams. blue_state = DraftState(DraftState.BLUE_TEAM, valid_champ_ids) red_state = DraftState(DraftState.RED_TEAM, valid_champ_ids) # Draft dictionary holds states for each perspective draft = {0: blue_state, 1: red_state} online_pred = tf.get_default_graph().get_tensor_by_name( "online/prediction:0") online_input = tf.get_default_graph().get_tensor_by_name("online/inputs:0") online_secondary_input = tf.get_default_graph().get_tensor_by_name( "online/secondary_inputs:0") experiences = [] successful_draft_count = 0 while (len(experiences) < n_experiences): if (successful_draft_count > MAX_DRAFT_ITERATIONS): break blue_state.reset() red_state.reset() submission_count = 0 while (blue_state.evaluate() != DraftState.DRAFT_COMPLETE and red_state.evaluate() != DraftState.DRAFT_COMPLETE): active_team = get_active_team(submission_count) inactive_team = 0 if active_team else 1 state = draft[active_team] start = deepcopy(state) if (random.random() < explore_prob): # Explore state space by submitting random action pred_act = [random.randint(0, state.num_actions - 1)] else: pred_act = sess.run(online_pred, feed_dict={ online_input: [state.format_state()], online_secondary_input: [state.format_secondary_inputs()] }) action = state.format_action(pred_act[0]) if (state.is_submission_legal(*action)): # Update active state state.update(*action) # Update inactive state, remembering to mask non-bans submitted by opponent (cid, pos) = action inactive_pos = pos if pos == -1 else 0 draft[inactive_team].update(cid, inactive_pos) submission_count += 1 else: bad_state = deepcopy(state) bad_state.update(*action) experiences.append( (start, action, get_reward(bad_state, match, action, None), bad_state)) break successful_draft_count += 1 return experiences