Demo notebook : SelfPlay with ONNX models#

Setup#

[ ]:
import importlib.util

DEV = True

if importlib.util.find_spec("google.colab") is not None:
    MODE = "colab-dev" if DEV else "colab"
else:
    MODE = "local"
[ ]:
if MODE == "colab":
    %pip install -q lczerolens
elif MODE == "colab-dev":
    !rm -r lczerolens
    !git clone https://github.com/Xmaster6y/lczerolens -b main
    %pip install -q ./lczerolens

Download ONNX Models#

[ ]:
!gdown 1yw2jzcdrGk3ao-nbVl42VUmCUF-6spfO -O maia1100.onnx
!gdown 1BFTmWwhILhAF4PJ2F3L15Fi5LPomCGFl -O maia1900.onnx
Downloading...
From: https://drive.google.com/uc?id=1yw2jzcdrGk3ao-nbVl42VUmCUF-6spfO
To: /mnt/d/Documents/0Polytech Sorbonne/MAIN/MAIN3/StageLip6/lczerolens/docs/source/notebooks/features/maia1100.onnx
100%|██████████████████████████████████████| 3.48M/3.48M [00:00<00:00, 38.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1BFTmWwhILhAF4PJ2F3L15Fi5LPomCGFl
To: /mnt/d/Documents/0Polytech Sorbonne/MAIN/MAIN3/StageLip6/lczerolens/docs/source/notebooks/features/maia1900.onnx
100%|██████████████████████████████████████| 3.48M/3.48M [00:00<00:00, 33.7MB/s]

Import Libraries#

[ ]:
# ruff: noqa: E402
import importlib
import lczerolens.sampling as sampling
import lczerolens.search as search
import numpy as np

for module in (sampling, search):
    importlib.reload(module)

import IPython.display
from lczerolens import LczeroBoard, LczeroModel
from lczerolens.sampling import MCTSSampler, ModelSampler, SelfPlay
from lczerolens.search import MaterialHeuristic, MCTS, Node

1. SelfPlay (MCTS + NN) Maia1100 VS Maia1900#

Load & Setup Model Samplers#

[ ]:
maia_1100_model = LczeroModel.from_path("maia1100.onnx")
maia_1900_model = LczeroModel.from_path("maia1900.onnx")
white_sampler = MCTSSampler(model=maia_1100_model, num_simulations=100, use_argmax=True)
black_sampler = ModelSampler(model=maia_1900_model, use_argmax=True)

Initialize SelfPlay#

[ ]:
self_play = SelfPlay(white=white_sampler, black=black_sampler)
logs = []


def report_fn(log, to_play):
    logs.append((log, to_play))

Play Game#

[ ]:
board = LczeroBoard()

game_moves, final_board = self_play.play(board=board, max_moves=10, report_fn=report_fn)

Display Results#

[ ]:
print("Game moves:", game_moves)
print("Game result:", final_board.result())
Game moves: [Move.from_uci('e2e4'), Move.from_uci('c7c5'), Move.from_uci('g1f3'), Move.from_uci('d7d6'), Move.from_uci('f1c4'), Move.from_uci('e7e6'), Move.from_uci('b1c3'), Move.from_uci('g8f6'), Move.from_uci('d2d3'), Move.from_uci('f8e7')]
Game result: *

2. Test heuristic : Material advantage#

[ ]:
fen = "6nr/pp3p1p/k1p5/8/1QN5/2P1P3/4KPqP/8 b - - 5 26"
iterations = 100
c_puct = 1.0
n_parallel_rollouts = 1
[ ]:
board = LczeroBoard(fen)
heuristic = MaterialHeuristic()
mcts = MCTS(c_puct=c_puct, n_parallel_rollouts=n_parallel_rollouts)
root = Node(board, parent=None)
[ ]:
mcts.search_(root, heuristic=heuristic, iterations=iterations)
best_move_idx = np.argmax(root.visits)
best_move = root.legal_moves[best_move_idx].uci()
print("Best move :", best_move)
Best move : g8e7
[ ]:
graph = MCTS.render_tree(root, max_depth=30)
display(IPython.display.HTML(graph))
%3 127344134067536 Root N=100 127344134067920 g8e7 N=24 V=-1.000 127344134067536->127344134067920 127344134151568 g8h6 N=24 V=-1.000 127344134067536->127344134151568 127344133821328 g8f6 N=23 V=-1.000 127344134067536->127344133821328 127344071944528 g2g7 N=22 V=-1.000 127344134067536->127344071944528 127344134836624 g2g6 N=7 V=-1.000 127344134067536->127344134836624 127344071965904 c4d6 N=1 V=1.000 127344134067920->127344071965904 127344071966480 c4b6 N=1 V=1.000 127344134067920->127344071966480 127344071960656 c4e5 N=1 V=1.000 127344134067920->127344071960656 127344071954768 c4a5 N=1 V=1.000 127344134067920->127344071954768 127344071957008 c4a3 N=1 V=1.000 127344134067920->127344071957008 127344071901328 c4d2 N=1 V=1.000 127344134067920->127344071901328 127344071903120 c4b2 N=1 V=1.000 127344134067920->127344071903120 127344071911760 b4e7 N=1 V=1.000 127344134067920->127344071911760 127344071903504 b4b7 N=1 V=1.000 127344134067920->127344071903504 127344071907152 b4d6 N=1 V=1.000 127344134067920->127344071907152 127344071917520 b4b6 N=1 V=1.000 127344134067920->127344071917520 127344071908816 b4c5 N=1 V=1.000 127344134067920->127344071908816 127344133897616 b4b5 N=1 V=1.000 127344134067920->127344133897616 127344133884176 b4a5 N=10 V=-1.000 127344134067920->127344133884176 127344133889808 c4d6 N=1 V=1.000 127344134151568->127344133889808 127344133896976 c4b6 N=1 V=1.000 127344134151568->127344133896976 127344133887696 c4e5 N=1 V=1.000 127344134151568->127344133887696 127344133893072 c4a5 N=1 V=1.000 127344134151568->127344133893072 127344133891408 c4a3 N=1 V=1.000 127344134151568->127344133891408 127344134210128 c4d2 N=1 V=1.000 127344134151568->127344134210128 127344134216464 c4b2 N=1 V=1.000 127344134151568->127344134216464 127344134223696 b4f8 N=1 V=1.000 127344134151568->127344134223696 127344134216592 b4e7 N=1 V=1.000 127344134151568->127344134216592 127344134217296 b4b7 N=1 V=1.000 127344134151568->127344134217296 127344134222800 b4d6 N=1 V=1.000 127344134151568->127344134222800 127344134218256 b4b6 N=1 V=1.000 127344134151568->127344134218256 127344133817616 b4c5 N=1 V=1.000 127344134151568->127344133817616 127344133831504 b4b5 N=1 V=1.000 127344134151568->127344133831504 127344133828560 b4a5 N=9 V=-1.000 127344134151568->127344133828560 127344133817168 c4d6 N=1 V=1.000 127344133821328->127344133817168 127344133832656 c4b6 N=1 V=1.000 127344133821328->127344133832656 127344133830480 c4e5 N=1 V=1.000 127344133821328->127344133830480 127344134315024 c4a5 N=1 V=1.000 127344133821328->127344134315024 127344134310928 c4a3 N=1 V=1.000 127344133821328->127344134310928 127344134313552 c4d2 N=1 V=1.000 127344133821328->127344134313552 127344134314576 c4b2 N=1 V=1.000 127344133821328->127344134314576 127344134317584 b4f8 N=1 V=1.000 127344133821328->127344134317584 127344134310480 b4e7 N=1 V=1.000 127344133821328->127344134310480 127344071938256 b4b7 N=1 V=1.000 127344133821328->127344071938256 127344071935312 b4d6 N=1 V=1.000 127344133821328->127344071935312 127344071944080 b4b6 N=1 V=1.000 127344133821328->127344071944080 127344071945424 b4c5 N=1 V=1.000 127344133821328->127344071945424 127344071940816 b4b5 N=1 V=1.000 127344133821328->127344071940816 127344071943568 b4a5 N=8 V=-1.000 127344133821328->127344071943568 127344071938640 c4d6 N=1 V=1.000 127344071944528->127344071938640 127344139712848 c4b6 N=1 V=1.000 127344071944528->127344139712848 127344133778768 c4e5 N=1 V=1.000 127344071944528->127344133778768 127344133776976 c4a5 N=1 V=1.000 127344071944528->127344133776976 127344133776912 c4a3 N=1 V=1.000 127344071944528->127344133776912 127344133780624 c4d2 N=1 V=1.000 127344071944528->127344133780624 127344133775376 c4b2 N=1 V=1.000 127344071944528->127344133775376 127344133773584 b4f8 N=1 V=1.000 127344071944528->127344133773584 127344133767504 b4e7 N=1 V=1.000 127344071944528->127344133767504 127344135070928 b4b7 N=1 V=1.000 127344071944528->127344135070928 127344134781712 b4d6 N=1 V=1.000 127344071944528->127344134781712 127344134838928 b4b6 N=1 V=1.000 127344071944528->127344134838928 127344134840784 b4c5 N=1 V=1.000 127344071944528->127344134840784 127344134841104 b4b5 N=1 V=1.000 127344071944528->127344134841104 127344134843088 b4a5 N=7 V=-1.000 127344071944528->127344134843088 127344136400912 c4d6 N=1 V=1.000 127344134836624->127344136400912 127344136399824 c4b6 N=1 V=1.000 127344134836624->127344136399824 127344136397008 c4e5 N=1 V=1.000 127344134836624->127344136397008 127344136392528 c4a5 N=1 V=1.000 127344134836624->127344136392528 127344136390672 c4a3 N=1 V=1.000 127344134836624->127344136390672 127344133756880 c4d2 N=1 V=1.000 127344134836624->127344133756880

3. SelfPlay : Choose both heuristic to make play one against another#

Load & Setup Model Samplers#

[ ]:
# white_sampler = RandomSampler()
# white_sampler = MCTSSampler(model=None, _heuristic=MaterialHeuristic(), use_argmax=False)
# white_sampler = MCTSSampler(model=None, _heuristic=DummyHeuristic(), use_argmax=False)
# white_sampler = ModelSampler(model=maia_1100_model, use_argmax=False)
white_sampler = ModelSampler(model=maia_1900_model, use_argmax=False)

# black_sampler = RandomSampler()
# black_sampler = MCTSSampler(model=None, _heuristic=DummyHeuristic(), use_argmax=False)
# black_sampler = MCTSSampler(model=None, _heuristic=MaterialHeuristic(), use_argmax=False)
# black_sampler = ModelSampler(model=maia_1100_model, use_argmax=False)
black_sampler = ModelSampler(model=maia_1900_model, use_argmax=False)

Initialize SelfPlay#

[ ]:
self_play = SelfPlay(white=white_sampler, black=black_sampler)
logs = []

Play Game#

[ ]:
board = LczeroBoard()

game_moves, final_board = self_play.play(board=board, max_moves=100, report_fn=report_fn)

Display Results#

[ ]:
print("Game moves:", game_moves)
print("Game result:", final_board.result())
Game moves: [Move.from_uci('e2e4'), Move.from_uci('c7c5'), Move.from_uci('b1c3'), Move.from_uci('b8c6'), Move.from_uci('g2g3'), Move.from_uci('e7e6'), Move.from_uci('f1g2'), Move.from_uci('d7d5'), Move.from_uci('e4d5'), Move.from_uci('e6d5'), Move.from_uci('g2d5'), Move.from_uci('g8f6'), Move.from_uci('d5g2'), Move.from_uci('f8d6'), Move.from_uci('d1e2'), Move.from_uci('d6e7'), Move.from_uci('d2d3'), Move.from_uci('a7a6'), Move.from_uci('c1g5'), Move.from_uci('c6d4'), Move.from_uci('e2d2'), Move.from_uci('d8b6'), Move.from_uci('g5f6'), Move.from_uci('b6b2'), Move.from_uci('a1c1'), Move.from_uci('e7f6'), Move.from_uci('g1e2'), Move.from_uci('c8g4'), Move.from_uci('f2f3'), Move.from_uci('d4f3'), Move.from_uci('g2f3'), Move.from_uci('g4f3'), Move.from_uci('h1f1'), Move.from_uci('f3e2'), Move.from_uci('c3e2'), Move.from_uci('f6c3'), Move.from_uci('f1f7'), Move.from_uci('c3d2'), Move.from_uci('e1d2'), Move.from_uci('b2f6'), Move.from_uci('f7b7'), Move.from_uci('f6c6'), Move.from_uci('c1b1'), Move.from_uci('a8d8'), Move.from_uci('b7g7'), Move.from_uci('e8f8'), Move.from_uci('g7g4'), Move.from_uci('h8g8'), Move.from_uci('g4f4'), Move.from_uci('f8g7'), Move.from_uci('b1f1'), Move.from_uci('g7h8'), Move.from_uci('f4f7'), Move.from_uci('c5c4'), Move.from_uci('f7f2'), Move.from_uci('c4d3'), Move.from_uci('c2d3'), Move.from_uci('c6a4'), Move.from_uci('e2c3'), Move.from_uci('a4a3'), Move.from_uci('f1c1'), Move.from_uci('d8d3'), Move.from_uci('d2d3'), Move.from_uci('a3c1'), Move.from_uci('a2a3'), Move.from_uci('g8d8'), Move.from_uci('d3c4'), Move.from_uci('c1a3'), Move.from_uci('f2f6'), Move.from_uci('h8g7'), Move.from_uci('c3e4'), Move.from_uci('a3d3'), Move.from_uci('c4b4'), Move.from_uci('d3e4'), Move.from_uci('b4a5'), Move.from_uci('d8d5'), Move.from_uci('a5a6'), Move.from_uci('e4e6'), Move.from_uci('a6b7'), Move.from_uci('e6f6'), Move.from_uci('b7a7'), Move.from_uci('d5a5'), Move.from_uci('a7b8'), Move.from_uci('f6d4'), Move.from_uci('b8c7'), Move.from_uci('a5a7'), Move.from_uci('c7b8'), Move.from_uci('d4b6'), Move.from_uci('b8c8'), Move.from_uci('b6c7')]
Game result: 0-1

Results (1 round) :#

Random * Random

Random 0-1 maia1900

Random 0-1 Dummy

Random * Material

Random 0-1 maia1100


Dummy * Material

Dummy 0-1 Material

Dummy * Maia1100

Dummy 0-1 Maia1900


Material * Material

Material * Maia1100

Material * Maia1900


Maia1100 1-0 Maia1100

Maia1100 * Maia1900


Maia1900 1-0 Maia1900.