Demo notebook : SelfPlay with ONNX models#
Setup#
[ ]:
import importlib.util
DEV = True
if importlib.util.find_spec("google.colab") is not None:
MODE = "colab-dev" if DEV else "colab"
else:
MODE = "local"
[ ]:
if MODE == "colab":
%pip install -q lczerolens
elif MODE == "colab-dev":
!rm -r lczerolens
!git clone https://github.com/Xmaster6y/lczerolens -b main
%pip install -q ./lczerolens
Download ONNX Models#
[ ]:
!gdown 1yw2jzcdrGk3ao-nbVl42VUmCUF-6spfO -O maia1100.onnx
!gdown 1BFTmWwhILhAF4PJ2F3L15Fi5LPomCGFl -O maia1900.onnx
Downloading...
From: https://drive.google.com/uc?id=1yw2jzcdrGk3ao-nbVl42VUmCUF-6spfO
To: /mnt/d/Documents/0Polytech Sorbonne/MAIN/MAIN3/StageLip6/lczerolens/docs/source/notebooks/features/maia1100.onnx
100%|██████████████████████████████████████| 3.48M/3.48M [00:00<00:00, 38.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1BFTmWwhILhAF4PJ2F3L15Fi5LPomCGFl
To: /mnt/d/Documents/0Polytech Sorbonne/MAIN/MAIN3/StageLip6/lczerolens/docs/source/notebooks/features/maia1900.onnx
100%|██████████████████████████████████████| 3.48M/3.48M [00:00<00:00, 33.7MB/s]
Import Libraries#
[ ]:
# ruff: noqa: E402
import importlib
import lczerolens.sampling as sampling
import lczerolens.search as search
import numpy as np
for module in (sampling, search):
importlib.reload(module)
import IPython.display
from lczerolens import LczeroBoard, LczeroModel
from lczerolens.sampling import MCTSSampler, ModelSampler, SelfPlay
from lczerolens.search import MaterialHeuristic, MCTS, Node
1. SelfPlay (MCTS + NN) Maia1100 VS Maia1900#
Load & Setup Model Samplers#
[ ]:
maia_1100_model = LczeroModel.from_path("maia1100.onnx")
maia_1900_model = LczeroModel.from_path("maia1900.onnx")
white_sampler = MCTSSampler(model=maia_1100_model, num_simulations=100, use_argmax=True)
black_sampler = ModelSampler(model=maia_1900_model, use_argmax=True)
Initialize SelfPlay#
[ ]:
self_play = SelfPlay(white=white_sampler, black=black_sampler)
logs = []
def report_fn(log, to_play):
logs.append((log, to_play))
Play Game#
[ ]:
board = LczeroBoard()
game_moves, final_board = self_play.play(board=board, max_moves=10, report_fn=report_fn)
Display Results#
[ ]:
print("Game moves:", game_moves)
print("Game result:", final_board.result())
Game moves: [Move.from_uci('e2e4'), Move.from_uci('c7c5'), Move.from_uci('g1f3'), Move.from_uci('d7d6'), Move.from_uci('f1c4'), Move.from_uci('e7e6'), Move.from_uci('b1c3'), Move.from_uci('g8f6'), Move.from_uci('d2d3'), Move.from_uci('f8e7')]
Game result: *
2. Test heuristic : Material advantage#
[ ]:
fen = "6nr/pp3p1p/k1p5/8/1QN5/2P1P3/4KPqP/8 b - - 5 26"
iterations = 100
c_puct = 1.0
n_parallel_rollouts = 1
[ ]:
board = LczeroBoard(fen)
heuristic = MaterialHeuristic()
mcts = MCTS(c_puct=c_puct, n_parallel_rollouts=n_parallel_rollouts)
root = Node(board, parent=None)
[ ]:
mcts.search_(root, heuristic=heuristic, iterations=iterations)
best_move_idx = np.argmax(root.visits)
best_move = root.legal_moves[best_move_idx].uci()
print("Best move :", best_move)
Best move : g8e7
[ ]:
graph = MCTS.render_tree(root, max_depth=30)
display(IPython.display.HTML(graph))
3. SelfPlay : Choose both heuristic to make play one against another#
Load & Setup Model Samplers#
[ ]:
# white_sampler = RandomSampler()
# white_sampler = MCTSSampler(model=None, _heuristic=MaterialHeuristic(), use_argmax=False)
# white_sampler = MCTSSampler(model=None, _heuristic=DummyHeuristic(), use_argmax=False)
# white_sampler = ModelSampler(model=maia_1100_model, use_argmax=False)
white_sampler = ModelSampler(model=maia_1900_model, use_argmax=False)
# black_sampler = RandomSampler()
# black_sampler = MCTSSampler(model=None, _heuristic=DummyHeuristic(), use_argmax=False)
# black_sampler = MCTSSampler(model=None, _heuristic=MaterialHeuristic(), use_argmax=False)
# black_sampler = ModelSampler(model=maia_1100_model, use_argmax=False)
black_sampler = ModelSampler(model=maia_1900_model, use_argmax=False)
Initialize SelfPlay#
[ ]:
self_play = SelfPlay(white=white_sampler, black=black_sampler)
logs = []
Play Game#
[ ]:
board = LczeroBoard()
game_moves, final_board = self_play.play(board=board, max_moves=100, report_fn=report_fn)
Display Results#
[ ]:
print("Game moves:", game_moves)
print("Game result:", final_board.result())
Game moves: [Move.from_uci('e2e4'), Move.from_uci('c7c5'), Move.from_uci('b1c3'), Move.from_uci('b8c6'), Move.from_uci('g2g3'), Move.from_uci('e7e6'), Move.from_uci('f1g2'), Move.from_uci('d7d5'), Move.from_uci('e4d5'), Move.from_uci('e6d5'), Move.from_uci('g2d5'), Move.from_uci('g8f6'), Move.from_uci('d5g2'), Move.from_uci('f8d6'), Move.from_uci('d1e2'), Move.from_uci('d6e7'), Move.from_uci('d2d3'), Move.from_uci('a7a6'), Move.from_uci('c1g5'), Move.from_uci('c6d4'), Move.from_uci('e2d2'), Move.from_uci('d8b6'), Move.from_uci('g5f6'), Move.from_uci('b6b2'), Move.from_uci('a1c1'), Move.from_uci('e7f6'), Move.from_uci('g1e2'), Move.from_uci('c8g4'), Move.from_uci('f2f3'), Move.from_uci('d4f3'), Move.from_uci('g2f3'), Move.from_uci('g4f3'), Move.from_uci('h1f1'), Move.from_uci('f3e2'), Move.from_uci('c3e2'), Move.from_uci('f6c3'), Move.from_uci('f1f7'), Move.from_uci('c3d2'), Move.from_uci('e1d2'), Move.from_uci('b2f6'), Move.from_uci('f7b7'), Move.from_uci('f6c6'), Move.from_uci('c1b1'), Move.from_uci('a8d8'), Move.from_uci('b7g7'), Move.from_uci('e8f8'), Move.from_uci('g7g4'), Move.from_uci('h8g8'), Move.from_uci('g4f4'), Move.from_uci('f8g7'), Move.from_uci('b1f1'), Move.from_uci('g7h8'), Move.from_uci('f4f7'), Move.from_uci('c5c4'), Move.from_uci('f7f2'), Move.from_uci('c4d3'), Move.from_uci('c2d3'), Move.from_uci('c6a4'), Move.from_uci('e2c3'), Move.from_uci('a4a3'), Move.from_uci('f1c1'), Move.from_uci('d8d3'), Move.from_uci('d2d3'), Move.from_uci('a3c1'), Move.from_uci('a2a3'), Move.from_uci('g8d8'), Move.from_uci('d3c4'), Move.from_uci('c1a3'), Move.from_uci('f2f6'), Move.from_uci('h8g7'), Move.from_uci('c3e4'), Move.from_uci('a3d3'), Move.from_uci('c4b4'), Move.from_uci('d3e4'), Move.from_uci('b4a5'), Move.from_uci('d8d5'), Move.from_uci('a5a6'), Move.from_uci('e4e6'), Move.from_uci('a6b7'), Move.from_uci('e6f6'), Move.from_uci('b7a7'), Move.from_uci('d5a5'), Move.from_uci('a7b8'), Move.from_uci('f6d4'), Move.from_uci('b8c7'), Move.from_uci('a5a7'), Move.from_uci('c7b8'), Move.from_uci('d4b6'), Move.from_uci('b8c8'), Move.from_uci('b6c7')]
Game result: 0-1
Results (1 round) :#
Random * Random
Random 0-1 maia1900
Random 0-1 Dummy
Random * Material
Random 0-1 maia1100
Dummy * Material
Dummy 0-1 Material
Dummy * Maia1100
Dummy 0-1 Maia1900
Material * Material
Material * Maia1100
Material * Maia1900
Maia1100 1-0 Maia1100
Maia1100 * Maia1900
Maia1900 1-0 Maia1900.