我只是简单地按照结构的基本极小极大函数:
def player(board):
if Terminal(board) != False:
return None
else:
if turn(board) == "X":
value,move = max_value(board)
return move + 1
else:
value,move = min_value(board)
return move + 1
def max_value(board):
global arg
if Terminal(board) != False:
ut = Utility(board)
return ut,None
else:
v = -1000
move = None
for action in Actions(board):
aux,act = min_value(Result(board,action))
if aux > v:
v = aux
move = action
if v == 1:
return v,move
return v,move
def min_value(board):
if Terminal(board) != False:
ut = Utility(board)
return ut,None
else:
v = 1000
move = None
print(Actions(board))
for action in Actions(board):
aux,act = max_value(Result(board,action))
if aux < v:
v = aux
move = action
if v == -1:
return v,move
return v,move
player最初被调用并返回最佳的移动。其他函数有:
def Actions(board = list):
possible_moves = []
n = -1
for move in board:
n += 1
if move == None:
possible_moves.append(n)
return possible_moves
def Result(board,action):
dc_board = board.copy()
dc_board[action] = turn(board)
return dc_board
def Terminal(board):
if board[0] == board[1] == board[2] and board[0] != None:
return board[0]
elif board[3] == board[4] == board[5] and board[3] != None:
return board[3]
elif board[6] == board[7] == board[8] and board[6]!= None:
return board[6]
elif board[0] == board[3] == board[6] and board[0] != None:
return board[0]
elif board[1] == board[4] == board[7] and board[1] != None:
return board[1]
elif board[2] == board[5] == board[8] and board[2] != None:
return board[2]
elif board[0] == board[5] ==board[8] and board[0]!= None:
return board[0]
elif board[2] == board[4] == board[6] and board[2] != None:
return board[2]
else:
return False
def Utility(board):
result = Terminal(board)
if result == False:
return 0
elif result == "X":
return 1
elif result == "O":
return -1
else:
print("Result:")
print(result)
print("Board:")
print(board)
raise "Utility conditions were not met."
我独立检查了它们的输出,但它们似乎都是合乎逻辑的。播放器函数只是按照TicTacToe棋盘的顺序返回移动,如果一个坐标满了,它就跳过它,例如:' if board[移动]!=无:棋盘[移动+1] =“O”'
1条答案
按热度按时间icomxhvb1#
我经常看到这种情况,问题出在你的效用函数上。你的返回值需要依赖于玩家的回合。现在如果"X"赢了,即使是"O"回合,你也会返回1。
我还认为你需要返回更高的值,以便以后在得分中包含深度(总是找到通往胜利的最短路径)。因此,将1和-1改为10和-10。
伪码: