def activite(jeu): setName(jeu) continuer = True desequilibre = False while continuer: color = colorPlayer(jeu) joueur = indiceJoueur(jeu) pioche = piocheJoueur(jeu) # si la pioche n'est pas vide et que l'empilement n'est pas en desequilibre if pioche and not desequilibre: selection = selectionnePlanchette(jeu) longueur = getlongueur(selection) marge = getMarge(selection) Pioche.retire(piocheJoueur(jeu), selection) selection = Planchette.cree(longueur, marge) decalage = choisisDecalage(jeu, selection) Pile.empileEtCalcule(jeu["pile"], selection, decalage, color) desequilibre = Pile.sommet(pile(jeu))["desequilibre"] passeJoueurSuivant(jeu) selection = "" sauvegarde(jeu) else: print("{} a gagné ".format(getName(jeu, joueur))) message = "{} a gagné ".format(getName(jeu, joueur)) continuer = False askRejouer(jeu, message) majVues(jeu) sauvegarde(jeu, True)
def return_parenthes(expression): maPile1 = Pile(9) for i in range(len(expression)): #print(expression[i]) if estexpression(expression[i]): maPile1.empiler(expression[i]) return maPile1.toString()
def pile_parenthes(expression): maPile = Pile(9) for i in range(len(expression)): if expression[i] == '[': maPile.empiler(']') elif expression[i] == '(': maPile.empiler( ')' ) tmp = maPile.sommet() if expression[i] != tmp: maPile.empiler(tmp) return maPile.pileVide()
def main(): boardWidth = 15 boardHeight = 40 shape = None gb = GameBoard("Game area", boardWidth, boardHeight) pile = Pile(boardWidth,boardHeight) play = True while play: gb.clear() if shape is not None: if shape.moveDown(): shape.show() else: pile.merge(shape) pile.clearFullLines() shape = generateRandomShape(int(boardWidth / 2), boardHeight - 2) shape.registerGameBoard(gb) shape.registerPile(pile) else: shape = generateRandomShape(int(boardWidth / 2), boardHeight - 2) shape.registerGameBoard(gb) shape.registerPile(pile) pile.show(gb) gb.update() time.sleep(0.3)
def getAction(self, state): """ Naivebot first tries to play most cards he can (truthfully if possible), tiebreaker between same length actions is according to which action gets him closer to his closest card """ bestActs = [] legalActs = self.getLegalActions(state) if state.getSaidLastCard() == None: return random.choice(legalActs) closestCard = self.findClosestCard(state) minD = NUM_CARD_TYPES actSizeList = [len(act[0]) for act in legalActs] maxSize = max(actSizeList) biggestActs = [act for act in legalActs if len(act[0]) == maxSize] for act in biggestActs: # print "in GetAction, action is", act # print "act[1][0] is ",act[1][0] newD = Pile.getCardDistance(closestCard, act[1][1],NUM_CARD_TYPES) #distance between said card and closest card # print "dist is ",newD if newD == minD: bestActs.append(act) if newD < minD: bestActs = [] minD = newD bestActs.append(act) return random.choice(bestActs)
def cree(): return { "fenetre": Fenetre.cree(1000, 600), "pile": Pile.cree(), "0": Joueur.cree(0), "1": Joueur.cree(1), "courant": 0, "name": ["", ""] }
def findClosestCard(self,state): ### fix min prob! currHand = state.hands[self.index] lastCardNum = state.getSaidLastCard()[1] cardFreqs = currHand.getAllCardFreqs() dList = [(num+1 ,Pile.getCardDistance(num+1 , lastCardNum, NUM_CARD_TYPES)) for num in range(len(cardFreqs)) if cardFreqs[num]>0 ] print "findClosestCard: " ,dList minCard = dList[0] for pair in dList: if pair[1] < minCard[1]: minCard = pair return minCard[0]
def getSuggestion(lastCard,index): dist= Pile.getCardDistance( lastCard,index, NUM_CARD_TYPES) if dist==0: return 0 elif lastCard>index: if lastCard-index==dist: return -1 else: return 1 else: if index-lastCard== dist: return 1 return -1
def centerMass(hand,lastCard=None): sumX=0 sumY=0 handFreqs = hand.getAllCardFreqs() angles=[] const= 2* pi/NUM_CARD_TYPES for i in range(0,NUM_CARD_TYPES): sumX=sumX+handFreqs[i]*cos(const*i) sumY=sumY+handFreqs[i]*sin(const*i) angle=atan2(sumY,sumX) if angle<0: angle=2*pi+angle # card= round(angle/const) card= int(round(angle/const)+1) card= card%len(handFreqs) var = 0 for i in range(1,NUM_CARD_TYPES+1): prob = float(handFreqs[i-1]) / hand.getSize() # print "i , prob , distance" ,i, prob , Pile.getCardDistance(i, card, NUM_CARD_TYPES) var += pow(Pile.getCardDistance(i, card, NUM_CARD_TYPES) , 2)*prob if not lastCard: return card , var# add variance return card , var# add variance
def choisisDecalage(jeu, planchetteAPoser): """ Choix d'un décalage. Demande au joueur courant de préciser le décalage. Si le joueur appuie sur Cancel, on retourne None. Tant que le décalage saisi n'est pas valide (selon définition Etape 1, règles du jeu), le joueur saisi un décalage. Lorsqu'il est valide, il retourne l'entier qui correspond au décalage. :param jeu: Le Jeu. :param planchetteAPoser: La planchette à poser. :type jeu: Tuple de type Jeu. :type planchetteAPoser: Planchette. :return: None OU decalage. :rtype: None OU Entier. """ decalage = Dialogue.saisisEntier("{joueur} | Précisez le décalage".format( joueur=Joueur.nom(joueurCourant(jeu)))) if decalage == None: return None sommet = Empilement.planchette(Pile.sommet( pile(jeu))) #Récupération de la pile du sommet. longueur_sommet = Planchette.longueur(sommet) marge_sommet = Planchette.marge(sommet) longueur_planchette = Planchette.longueur(planchetteAPoser) limite_inf = (longueur_sommet / 2 - marge_sommet) - longueur_planchette / 2 limite_sup = (marge_sommet - longueur_sommet / 2) + longueur_planchette / 2 while (limite_inf < decalage < limite_sup): decalage = Dialogue.saisisEntier( "{joueur} | Précisez le décalage".format( joueur=Joueur.nom(joueurCourant(jeu)))) if decalage == None: return None return decalage
def getFeatures(self, state , action): feats = util.Counter() winValue = 100 real , said , agentIndex = action tempState = state.deepCopy() tempState.doAction(action) lastCardNum = tempState.getSaidLastCard()[1] oldHandList = state.hands[agentIndex].getAsList() newHand = tempState.hands[agentIndex] # distance , size = extractLargestGroup(newHand.getAllCardFreqs(), state.realLastCard[1]) # EREZ the tool # feats["distanceFromLG"] = (NUM_CARD_TYPES - distance+1)/NUM_CARD_TYPES feats["numberOfCardsPlayed"] = said[0] cm , var = centerMass(newHand) distance = Pile.getCardDistance(lastCardNum, cm, NUM_CARD_TYPES) feats["distanceFromCM"] = (NUM_CARD_TYPES - distance+1)/NUM_CARD_TYPES feats["var"] = 10 - var # curve fit matlab? if newHand.getSize()==0 and real == said: # this is the winning move feats["win"]= winValue else: feats["win"] = 0 # feats["size"] = size / newHand.getSize() factor= cos( (pi /2) * tempState.pile.getSize()/TOTAL_CARDS) # to take into account size of deck if real != said: if said in oldHandList: #means it is a good lie feats["goodLie"] = pow(factor,2) else: feats["badLie"] = pow(factor,2) # else: feats["truth"] = 1 if tempState.isWinner(agentIndex): # we want to discourage lying if this is the winning action feats["goodLie"]=-2 feats["badLie"]=-2 # use probability table from defense player to estimate how good a lie is return feats
from Pile import * mapile1 = Pile(5) print(mapile1) mapile1.empiler(10) print(mapile1) mapile1.empiler(20) print(mapile1) mapile1.empiler(11) print(mapile1) mapile1.empiler(0) print(mapile1) mapile1.empiler(19) print(mapile1) mapile1.empiler(18) print(mapile1) mapile1.empiler(15) mapile1.afficher() print("voila le sommet", mapile1.sommet()) mapile1.depiler() print(mapile1) mapile1.purger() print(mapile1) print("voila le sommet", mapile1.sommet()) mapile1.purger() print(mapile1) mapile1.depiler()
def activite(jeu, debutPartie=True): """ Applique le diagramme d'activité de la Notice de l'Etape 5. :param jeu: Le Jeu. :type jeu: Tuple. """ nombrePlanchettes = Pioche.nombrePlanchettes( Joueur.pioche(joueurCourant(jeu))) desequilibre = False partieFinie = False while nombrePlanchettes != 0 and desequilibre == False and partieFinie != True: VueJeu.affichageMessage(Fenetre.toile(fenetre(jeu)), "Tour : " + Joueur.nom(joueurCourant(jeu)), 120, 50, 24) planchette = selectionnePlanchette(jeu) if planchette == None: partieFinie = True else: #### REPLAY #### saveReplay(indiceJoueur(jeu)) #Toujours l'indice du joueur en 1er saveReplay( str(Planchette.longueur(planchette)) + "," + str(Planchette.marge(planchette)) ) #Puis on sauvegarde la planchette dans un format prédéfini. #### JEU COURANT #### pioche = Joueur.pioche(joueurCourant(jeu)) Pioche.retire(pioche, Planchette.numero( planchette)) #On retire la planchette de la pioche. if debutPartie: #Si c'est le début de partie Pile.empileEtCalcule(pile(jeu), planchette, 0) saveReplay(0) #On met le décalage à 0. majVues(jeu) passeJoueurSuivant(jeu) debutPartie = False else: #Si ce n'est pas le début de partie passeJoueurSuivant(jeu) decalage = choisisDecalage(jeu, planchette) saveReplay(decalage) if decalage == None: partieFinie = True else: Pile.empileEtCalcule(pile(jeu), planchette, decalage) for empilement in pile(jeu): if Empilement.desequilibre(empilement): desequilibre = True nombrePlanchettes = Pioche.nombrePlanchettes( Joueur.pioche(joueurCourant(jeu))) majVues(jeu) if desequilibre: #S'il y a un déséquilibre Dialogue.afficheMessage( "{joueur} gagne !".format(joueur=Joueur.nom(joueurCourant(jeu)))) else: #S'il n'y en a pas. if Pioche.nombrePlanchettes(Joueur.pioche(joueurCourant(jeu))) == 0: Dialogue.afficheMessage("Egalité de la partie") else: save = Dialogue.yesNoMessage( "La partie est terminée ! Voulez-vous sauvegarder ?") #SAUVEGARDE if save: sauvegarde_jeu = (pile(jeu), joueurs(jeu), { 'indiceJoueur': indiceJoueur(jeu) }) pickle.dump(sauvegarde_jeu, open("save.txt", "wb")) #Rejouer une partie ? rejouer = Dialogue.yesNoMessage("Voulez-vous recommencer une partie ?") Fenetre.quitte(fenetre(jeu)) #On supprime dans tous les cas la fenêtre if rejouer: joue(cree( )) #On recrée les données puis on nettoie et on peut joueur à nouveau.
def cree(): return (Fenetre.cree(1000, 600), Pile.cree(), [Joueur.cree(1), Joueur.cree(2)], { 'indiceJoueur': 0 })
def replay(jeu, iteration=0): """ Fonction qui s'occupe d'afficher le replay d'une partie. Cette fonction lit le fichier replay.txt s'il existe. Les données sont stockées sous la forme: indiceJoueur;planchette;decalage indiceJoueur est un entier, planchette est de la forme longueur,marge et decalage est un entier. Entre chaque lecture de ligne, donc chaque tour on met un petit timer de 2 secondes. Aucun retour, juste de l'affichage. Pour l'affichage, on utilise la fonction tkinter.after, équivalent de time.sleep. time.sleep ne fonctionne pas avec Tkinter : l'affichage ne se faisait qu'à la toute fin. Pause de 2 secondes entre chaque tour. :param jeu: Tuple fourni par Jeu.cree() :type jeu: tuple :return: Nothing :rtype: Nothing """ #On regarde si le fichier replay.txt existe avec un try/catch simple. try: file = open("replay.txt") except IOError: print("Fichier de replay introuvable.") VueJeu.affichageMessage(Fenetre.toile(fenetre(jeu)), "Fichier introuvable.\nRééssayez") else: with file: contenu = file.read().split(";") #On split les données par ";" #Compréhension de listes parce que c'est joli et qu'on l'a fait nulle part dans le projet. Et on prend le 1er élément. tour = [ contenu[i:i + 3] if len(contenu[i:i + 3]) == 3 else None for i in range(iteration, iteration + 3, 3) ][0] if tour != None: joueurCourant = int(tour[0]) piocheJoueur = Joueur.pioche( joueurs(jeu)[joueurCourant]) #On récupère sa pioche planchette_brute = tour[1].split(",") longueur, marge = int(planchette_brute[0]), int( planchette_brute[1]) planchette = Planchette.cree(longueur, marge) decalage = int(tour[2]) Pioche.retire(piocheJoueur, Planchette.numero(planchette)) Pile.empileEtCalcule(pile(jeu), planchette, decalage) majVues(jeu) # Documentation : http://tkinter.fdex.eu/doc/uwm.html#after Fenetre.tk(fenetre(jeu)).after(1000, replay, jeu, iteration + 3) #Une aide pour mieux comprendre la partie : qui joue ? #On le place après le after sinon le message est supprimé avant même que nous l'ayons vu. VueJeu.affichageMessage( Fenetre.toile(fenetre(jeu)), "Tour : " + Joueur.nom(joueurs(jeu)[joueurCourant]), 120, 50, 24) else: # On remet à zéro comme ça ça efface le texte mis auparavant. Fenetre.effaceGraphiques(fenetre(jeu)) majVues(jeu) #Et on affiche. VueJeu.affichageMessage(Fenetre.toile(fenetre(jeu)), "PARTIE TERMINEE") dernier_joueur = int(contenu[-4]) VueJeu.affichageMessage( Fenetre.toile(fenetre(jeu)), Joueur.nom(joueurs(jeu)[dernier_joueur]) + " perd !", 500, 300)
def NPI(calcul): mapile = Pile(5) cal = calcul.split() for i in range(len(cal)): if not estOperation(cal[i]): mapile.empiler(cal[i]) mapile.afficher() else: operande2 = mapile.sommet() mapile.afficher() operande1 = mapile.sommet() mapile.afficher() resultat = calculer(operande1, operande2, cal[i]) mapile.empiler(resultat) mapile.afficher() return mapile.sommet()
def extractLargestGroup(self,hand,lastCard): if not (0 in hand): return 2 j=0 # Iteration counter i=lastCard-1 #start searching for group before current card if i==0: i=NUM_CARD_TYPES-1 lastGroupIndex=[0,0] currGroupIndex= [0,0] currGroupSize=0 lastGroupSize=0 maxGroupSize=0 distMaxGroup=NUM_CARD_TYPES currSuggestion=0 while j<=NUM_CARD_TYPES+2: while hand[i-1]==0 and j<=NUM_CARD_TYPES+2: #find the first card which is in the hand i=getNext(i) j=j+1 if j>NUM_CARD_TYPES+2: break lastGroupIndex= currGroupIndex[:] lastGroupSize=currGroupSize currGroupSize=0 currGroupIndex[0]=i while hand[getNext(i)-1]>0 and j<=NUM_CARD_TYPES+2: currGroupSize=currGroupSize+hand[i-1] i=getNext(i) j=j+1 if j<=NUM_CARD_TYPES+2: currGroupSize=currGroupSize+hand[i-1] currGroupIndex[1]=i else: currGroupIndex[1]=getPrev(i) size=currGroupSize if (not (lastGroupIndex[0]==0)) and Pile.getCardDistance( lastGroupIndex[1] , currGroupIndex[0], NUM_CARD_TYPES)==2: startIndex=lastGroupIndex[0] size=size+lastGroupSize else: startIndex=currGroupIndex[0] groupIndexes=[startIndex, currGroupIndex[1]] if size>=maxGroupSize: dist=Pile.getCardDistance(lastCard,groupIndexes[0],NUM_CARD_TYPES) dist2=Pile.getCardDistance(lastCard,groupIndexes[1],NUM_CARD_TYPES) if dist <= dist2: index=groupIndexes[0] else: index=groupIndexes[1] dist=dist2 if size>maxGroupSize or distMaxGroup>dist: distMaxGroup=dist maxGroupSize=size currSuggestion=getSuggestion(lastCard,index) i=getNext(i) j=j+1 return currSuggestion #def extractLargestGroup(self,hand,lastCard): # j=0 # Iteration counter # i=lastCard-1 #start searching for group before current card # if i==0: # i=NUM_CARD_TYPES-1 # currGroupSize=0 # lastGroupSize=0 # maxGroupSize=0 # distMaxGroup=0 # while j<NUM_CARD_TYPES+2: # while hand[i]==0 and j<NUM_CARD_TYPES+2: #find the first card which is in the hand # i=i+1 # if i==NUM_CARD_TYPES: # i=0 # j=j+1 # if j==NUM_CARD_TYPES+2: # break # # lastGroupSize=currGroupSize # currGroupSize=0 # dist= abs(i-lastCard) # while hand[i]>0 and j<NUM_CARD_TYPES: # currGroupSize=currGroupSize+hand[i] # i=i+1 # if (i>=NUM_CARD_TYPES): # i=1 # j=j+1 # distLast=abs(i-lastCard) # if dist>distLast: # dist=distLast # if lastGroupSize+currGroupSize>maxGroupSize: # maxGroupSize=lastGroupSize+currGroupSize # distMaxGroup=dist # elif lastGroupSize+currGroupSize==maxGroupSize and dist<distMaxGroup: # distMaxGroup=dist # return distMaxGroup, maxGroupSize
def extractLargestGroup(hand,lastCard): if not (0 in hand): return 2 j=0 # Iteration counter i=lastCard-1 #start searching for group before current card if i==0: i=NUM_CARD_TYPES-1 lastGroupIndex=[0,0] currGroupIndex= [0,0] currGroupSize=0 lastGroupSize=0 maxGroupSize=0 distMaxGroup=NUM_CARD_TYPES currSuggestion=0 while j<=NUM_CARD_TYPES+2: while hand[i-1]==0 and j<=NUM_CARD_TYPES+2: #find the first card which is in the hand i=getNext(i) j=j+1 if j>NUM_CARD_TYPES+2: break lastGroupIndex= currGroupIndex[:] lastGroupSize=currGroupSize currGroupSize=0 currGroupIndex[0]=i while hand[getNext(i)-1]>0 and j<=NUM_CARD_TYPES+2: currGroupSize=currGroupSize+hand[i-1] i=getNext(i) j=j+1 if j<=NUM_CARD_TYPES+2: currGroupSize=currGroupSize+hand[i-1] currGroupIndex[1]=i else: currGroupIndex[1]=getPrev(i) size=currGroupSize if (not (lastGroupIndex[0]==0)) and Pile.getCardDistance( lastGroupIndex[1] , currGroupIndex[0], NUM_CARD_TYPES)==2: startIndex=lastGroupIndex[0] size=size+lastGroupSize else: startIndex=currGroupIndex[0] groupIndexes=[startIndex, currGroupIndex[1]] if size>=maxGroupSize: dist=Pile.getCardDistance(lastCard,groupIndexes[0],NUM_CARD_TYPES) dist2=Pile.getCardDistance(lastCard,groupIndexes[1],NUM_CARD_TYPES) if dist <= dist2: index=groupIndexes[0] else: index=groupIndexes[1] dist=dist2 if size>maxGroupSize or distMaxGroup>dist: distMaxGroup=dist maxGroupSize=size currSuggestion=getSuggestion(lastCard,index) i=getNext(i) j=j+1 return currSuggestion
def calcInitProb(self,numCards,amountOfCard,sizeHand,othersHave,index): for k in range(0,NUM_CARD_TYPES): if (self.numPlayers==2): has=floor((numCards-othersHave[k])/amountOfCard) self.rivalPiles[index][amountOfCard].addCards([k for z in range(0,has) continue if amountOfCard<=othersHave[k]: prop=choose(othersHave[k],amountOfCard) prop=prop*choose(numCards-othersHave[k],sizeHand-amountOfCard) prop=prop/choose(numCards,sizeHand) else: prop=0 self.rivalPiles[index][amountOfCard].addCard(k,True,prop) #choose of a over b def choose(self, a,b): sum=1 if a<b: return 0 if a==b: return sum for m in range(b+1,a+1): sum=sum*m return m """" The starategy is to let every rival player gain the possibility of having the cards the players doesnt have """ def updateDefense(self,action): numCards,said,agentNum= action if (agentNum!=self.index): self.rivalPiles[agentNum].removeCards(said,numCards, True, self.lieFactor[agentNum]) def updateDefGetPile(self,pile,agentNum): if agentNum==self.index: #else: def __str__(self): printstr = "BluffbotAgent, #"+str(self.index)+"\n" return printstr # def caughtLying(self): # self.numWasCaughtLying += 1 def getNumWasCaughtLying(self): return self.numWasCaughtLying def getIndex(self): return self.index """ Orders the cards according to our priority of getting rid of them (worst cards are first). Cards are ordered according to the direction we want to go relative to current last card.0 means we prefer to go down from the current last card, meaning we want to get rid of our high cards, 1 vice versa POSSIBLE OPTIMIZATION: don't only order by rank but also by size num suits available (2 tens shouldn't be lied with before 9,8 for example) """ def orderWorstCards(self,state, prefDirection): self.worstCards = [] lastCardNum = state.getSaidLastCard()[1] pivot = (lastCardNum+ (NUM_CARD_TYPES/2))% NUM_CARD_TYPES +1 print "pivot is ", pivot legalNums = self.getLegalCardNums(lastCardNum) print"legal nums are",legalNums lieNumbers = deque([num+1 for num in range(NUM_CARD_TYPES) if (num+1) not in legalNums]) for i in range(len(lieNumbers)-1): if (abs(lieNumbers[i+1]-lieNumbers[i])%NUM_CARD_TYPES) != 1: break midCard = lieNumbers[i] print "midCard , i are ",midCard,i print "[orderWorstCards] lie numbers are", lieNumbers if prefDirection == 0: lieNumbers = deque(reversed(lieNumbers)) legalNums = reversed(legalNums) while lieNumbers[0] != pivot: lieNumbers.rotate(1) print "[orderWorstCards] rotated lie numbers are", lieNumbers newMidIndex = list(lieNumbers).index(midCard) lowerHalf = list(lieNumbers)[:newMidIndex+prefDirection] # correction to index if we prefer up direction upperHalf = reversed(list(lieNumbers)[newMidIndex+prefDirection:]) orderedLieNums = lowerHalf+list(upperHalf)+list(legalNums) print "[orderWorstCards] orderedLieNums are", orderedLieNums self.canLie = False for cardNum in orderedLieNums: print" card Num is ",cardNum cardsList = state.hands[self.index].getAllOfCardsNum(cardNum) if cardsList != None: self.worstCards += cardsList if cardNum not in legalNums: # we do have a card self.canLie = True print "[orderWorstCards] worstCardList is", self.worstCards """ Returns False if agent thinks target has lied, True if agent thinks he told the truth """ def playDefense(self,state, offensePlayerIndex): return False def getLegalActions(self, state): legalActions = [] self.orderWorstCards(state , 0) currHand = state.hands[self.index] #terminal state, no legal actions if currHand.getSize() == 0: return None # add beginning state!!! lastCardNum = state.getSaidLastCard()[1] legalNums = self.getLegalCardNums(lastCardNum) for i in range(min(currHand.getSize(),CARDS_PER_TURN) ): # so we don't attempt to play more than we have playSize = i+1 for num in legalNums: # find moves for each legal num- if last card is 6, legal nums are 5 6 7 realBL = [] saidBL = [] realGL = [] saidGL = [] realT = [] saidT = [] cards = currHand.getChoice(num , playSize) if cards != None: for cardIndex in range(len(cards)): realT.append(cards[cardIndex]) saidT.append(cards[cardIndex]) if self.canLie: # True if we have cards that aren't in the legal cards group, meaning we can lie with them >:) realGL.append(self.worstCards[cardIndex]) saidGL.append(cards[cardIndex]) legalActions.append( (realT , saidT ,self.index ) ) if self.canLie: legalActions.append((realGL , (len(realGL),num) , self.index ) ) else: for cardIndex in range(playSize): realBL.append(self.worstCards[cardIndex]) saidBL.append((1,num)) legalActions.append((realBL , (len(realBL),num) , self.index)) ################# return legalActions def getValue(self, state): """ Returns max_action Q(state,action) where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of 0.0. """ "terminal state" if not self.getLegalActions(state): return 0.0 poliVal = self.getPoliVal(state) return poliVal[1] # the value def getPolicy(self, state): """ Compute the best action to take in a state. Note that if there are no legal actions, which is the case at the terminal state, you should return None. """ if not self.getLegalActions(state): return None poliVal = self.getPoliVal(state) return poliVal[0] # the action def getPoliVal(self,state): maxVal=-float('inf') actionsList=[] for nextAction in self.getLegalActions(state): if maxVal<self.getQValue(state,nextAction): maxVal=self.getQValue(state,nextAction) actionsList=[nextAction] if abs(maxVal - self.getQValue(state,nextAction)) < 0.01: actionsList.append(nextAction) return random.choice(actionsList), maxVal def getAction(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. """ # Pick Action legalActions = self.getLegalActions(state) if util.flipCoin(self.epsilon): return random.choice(legalActions) return self.getPolicy(state) def getQValue(self, state, action): """ Should return Q(state,action) = w * featureVector where * is the dotProduct operator """ featureDict = self.featExtractor.getFeatures(state,action) sigma = 0 for feat in featureDict.keys(): sigma += featureDict[feat]*self.wDict[feat] return sigma def update(self, state, action, nextState, reward): """ Should update your weights based on transition """ maxNextQval = 0.0 oldQ = self.getQValue(state, action) ### finding max Q value on succesors if not not self.getLegalActions(nextState): nextActionQvals= [self.getQValue(nextState, nextAction) for nextAction in self.getLegalActions(nextState)] maxNextQval=max(nextActionQvals) correction = (reward+self.discount*maxNextQval-oldQ) featureDict = self.featExtractor.getFeatures(state,action) for feat in featureDict.keys(): self.wDict[feat] += self.alpha*correction*featureDict[feat] class NaivebotAgent(game.Agent): def __init__(self,index,numPlayers , naiveFactor = 0.5,numTraining=0): self.index = index self.naiveFactor = naiveFactor self.hands = [ Pile.Pile(NUM_SUITS , NUM_CARD_TYPES) for i in range(numPlayers)] # to also hold estimates of opponents hands self.numTraining = numTraining self.initAgent() def initAgent(self): self.numWasCaughtLying = 0 self.canLie = False def getNumWasCaughtLying(self): return self.numWasCaughtLying # def getIndex(self): # return self.index def __str__(self): printstr = "NaivebotAgent, #"+str(self.index)+"\n" return printstr def findClosestCard(self,state): ### fix min prob! currHand = state.hands[self.index] lastCardNum = state.getSaidLastCard()[1] cardFreqs = currHand.getAllCardFreqs() dList = [(num+1 ,Pile.getCardDistance(num+1 , lastCardNum, NUM_CARD_TYPES)) for num in range(len(cardFreqs)) if cardFreqs[num]>0 ] print "findClosestCard: " ,dList minCard = dList[0] for pair in dList: if pair[1] < minCard[1]: minCard = pair return minCard[0] def getAction(self, state): """ Naivebot first tries to play most cards he can (truthfully if possible), tiebreaker between same length actions is according to which action gets him closer to his closest card """ bestActs = [] legalActs = self.getLegalActions(state) if state.getSaidLastCard() == None: return random.choice(legalActs) closestCard = self.findClosestCard(state) minD = NUM_CARD_TYPES actSizeList = [len(act[0]) for act in legalActs] maxSize = max(actSizeList) biggestActs = [act for act in legalActs if len(act[0]) == maxSize] for act in biggestActs: # print "in GetAction, action is", act # print "act[1][0] is ",act[1][0] newD = Pile.getCardDistance(closestCard, act[1][1],NUM_CARD_TYPES) #distance between said card and closest card # print "dist is ",newD if newD == minD: bestActs.append(act) if newD < minD: bestActs = [] minD = newD bestActs.append(act) return random.choice(bestActs) def playDefense(self,state, offensePlayerIndex): if state.isWinner(offensePlayerIndex): return True if util.flipCoin(self.naiveFactor): return True else: return False def getLegalActions(self, state): legalTrueActions = [] legalFalseActions = [] currHand = state.hands[self.index] handList = state.hands[self.index].getAsList() #terminal state, no legal actions if currHand.getSize() == 0: return None # add beginning state!!! if state.getSaidLastCard() == None: for card in handList: legalTrueActions.append(([card] , (1 , card[1]) , self.index)) return legalTrueActions lastCardNum = state.getSaidLastCard()[1] legalNums = self.getLegalCardNums(lastCardNum) for i in range(min(currHand.getSize(),CARDS_PER_TURN) ): # so we don't attempt to play more than we have playSize = i+1 for num in legalNums: # find true moves trueMove = [] badLieMove =[] real =[] said=[] cards = currHand.getChoice(num , playSize) if cards != None: for cardIndex in range(len(cards)): real.append(cards[cardIndex]) said.append(cards[cardIndex]) trueMove.append((cards[cardIndex] , cards[cardIndex], self.index)) legalTrueActions.append( (real , (len(real) , num) , self.index) ) else: tmp = handList[:] for cardIndex in range(playSize): randomCard = random.choice(tmp) tmp.remove(randomCard) real.append(randomCard) said.append((1,num)) badLieMove.append( (randomCard, (1,num),self.index) ) legalFalseActions.append( (real,( len(real) ,num ) , self.index )) ################# if legalTrueActions != [] : #############EREZ: try not legalTrueActions return legalTrueActions else: return legalFalseActions class HumanAgent(game.Agent): def __init__(self,index,numPlayers ): self.numPlayers = numPlayers self.index = index self.initAgent() def initAgent(self): self.numWasCaughtLying = 0 def convertStrToTuple(self , str): trimStr = str.replace(' ' ,'') trimStr = trimStr.strip('()') trimStrSplit = trimStr.split(',') print trimStrSplit return (int(trimStr[0]),int(trimStr[1])) """ Gets move from human input. No validity checking yet """ def getAction(self, state): realMove = [] hand = state.hands[self.getIndex()] print "Your hand is \n"+str(hand) print "Last cards are "+ str(state.getSaidLastCard()) print "Enter card numbers you wish to play" inputStr = raw_input("Enter card number you wish to play and amount (in the format amount,cardNum , 'f' when finished") while(inputStr != 'f'): inputSplit = inputStr.split(',') amount = int(inputSplit[0]) cardNum = int(inputSplit[1]) realChoice = hand.getChoice(cardNum , amount) if realChoice != None: realMove +=(realChoice) inputStr = raw_input("Enter card number you wish to play and amount (in the format amount,cardNum , 'f' when finished") inputStr = raw_input("And what will you say these cards are?") saidCards = (len(realChoice), int(inputStr)) action = ( realMove , saidCards,self.getIndex() ) print action return action def playDefense(self,state, offensePlayerIndex): print "What will you play? Your hand is \n"+str(state.hands[self.getIndex()]) print "Last cards are "+ str(state.getSaidLastCard())+", played by opponent "+str((self.getIndex()-1)%self.numPlayers) rawDefense = raw_input("Play defense: enter t if you think the move is true, f if you think opponent lied") if rawDefense == 't': return False # playDefense returns false if we think move is true if rawDefense == 'f': return True return True def caughtLying(self): self.numWasCaughtLying += 1 def getNumWasCaughtLying(self): return self.numWasCaughtLying def getIndex(self): return self.index