[{"data":1,"prerenderedAt":2267},["ShallowReactive",2],{"project-rl-tennis-atari-game":3},{"id":4,"title":5,"description":6,"extension":7,"favorite":8,"icon":9,"meta":10,"publishedAt":2254,"readingTime":2248,"shortDescription":2255,"slug":2256,"status":2257,"stem":2258,"tags":2259,"type":2265,"__hash__":2266},"projects\u002Fprojects\u002Frl-tennis-atari-game.md","Reinforcement Learning for Tennis Strategy Optimization","An academic project exploring the application of reinforcement learning to optimize tennis strategies. The project involves training RL agents on Atari Tennis (ALE) to evaluate strategic decision-making through competitive self-play and baseline benchmarking.","md",false,"i-ph-lightning-duotone",{"body":11},{"type":12,"value":13,"toc":2237},"minimark",[14,23,41,46,49,53,1653,1657,1998,2002,2038,2042,2052,2056,2061,2102,2106,2109,2113,2173,2177,2185,2189,2225,2229],[15,16,17,18,22],"p",{},"Comparison of Reinforcement Learning algorithms on Atari Tennis (",[19,20,21],"code",{},"ALE\u002FTennis-v5"," via Gymnasium\u002FPettingZoo).",[24,25,26],"ul",{},[27,28,29,33,34],"li",{},[30,31,32],"strong",{},"GitHub Repository:"," ",[35,36,40],"a",{"href":37,"rel":38},"https:\u002F\u002Fgithub.com\u002FArthurDanjou\u002FTennis-Atari-Game",[39],"nofollow","Tennis-Atari-Game",[42,43,45],"h2",{"id":44},"overview","Overview",[15,47,48],{},"This project implements and compares five RL agents playing Atari Tennis against the built-in AI and in head-to-head tournaments.",[42,50,52],{"id":51},"algorithms","Algorithms",[54,55,56,75],"table",{},[57,58,59],"thead",{},[60,61,62,66,69,72],"tr",{},[63,64,65],"th",{},"Agent",[63,67,68],{},"Type",[63,70,71],{},"Policy",[63,73,74],{},"Update Rule",[76,77,78,95,671,1265,1638],"tbody",{},[60,79,80,86,89,92],{},[81,82,83],"td",{},[30,84,85],{},"Random",[81,87,88],{},"Baseline",[81,90,91],{},"Uniform random",[81,93,94],{},"None",[60,96,97,102,105,108],{},[81,98,99],{},[30,100,101],{},"SARSA",[81,103,104],{},"TD(0), on-policy",[81,106,107],{},"ε-greedy",[81,109,110],{},[111,112,115,241],"span",{"className":113},[114],"katex",[111,116,119],{"className":117},[118],"katex-mathml",[120,121,123],"math",{"xmlns":122},"http:\u002F\u002Fwww.w3.org\u002F1998\u002FMath\u002FMathML",[124,125,126,236],"semantics",{},[127,128,129,138,142,148,151,154,157,161,164,166,169,179,181,192,195,201,204,207,213,215,217,219,221,223,225,227,230,232,234],"mrow",{},[130,131,132,136],"msub",{},[133,134,135],"mi",{},"W",[133,137,35],{},[139,140,141],"mo",{},"←",[130,143,144,146],{},[133,145,135],{},[133,147,35],{},[139,149,150],{},"+",[133,152,153],{},"α",[139,155,156],{},"⋅",[139,158,160],{"stretchy":159},"false","(",[133,162,163],{},"r",[139,165,150],{},[133,167,168],{},"γ",[170,171,173,176],"mover",{"accent":172},"true",[133,174,175],{},"q",[139,177,178],{},"^",[139,180,160],{"stretchy":159},[182,183,184,187],"msup",{},[133,185,186],{},"s",[139,188,191],{"mathvariant":189,"lspace":190,"rspace":190},"normal","0em","′",[139,193,194],{"separator":172},",",[182,196,197,199],{},[133,198,35],{},[139,200,191],{"mathvariant":189,"lspace":190,"rspace":190},[139,202,203],{"stretchy":159},")",[139,205,206],{},"−",[170,208,209,211],{"accent":172},[133,210,175],{},[139,212,178],{},[139,214,160],{"stretchy":159},[133,216,186],{},[139,218,194],{"separator":172},[133,220,35],{},[139,222,203],{"stretchy":159},[139,224,203],{"stretchy":159},[139,226,156],{},[133,228,229],{},"ϕ",[139,231,160],{"stretchy":159},[133,233,186],{},[139,235,203],{"stretchy":159},[237,238,240],"annotation",{"encoding":239},"application\u002Fx-tex","W_a \\leftarrow W_a + \\alpha \\cdot (r + \\gamma \\hat{q}(s', a') - \\hat{q}(s, a)) \\cdot \\phi(s)",[111,242,245,325,382,402,426,577,653],{"className":243,"ariaHidden":172},[244],"katex-html",[111,246,249,254,313,318,322],{"className":247},[248],"base",[111,250],{"className":251,"style":253},[252],"strut","height:0.8333em;vertical-align:-0.15em;",[111,255,258,263],{"className":256},[257],"mord",[111,259,135],{"className":260,"style":262},[257,261],"mathnormal","margin-right:0.1389em;",[111,264,267],{"className":265},[266],"msupsub",[111,268,272,304],{"className":269},[270,271],"vlist-t","vlist-t2",[111,273,276,299],{"className":274},[275],"vlist-r",[111,277,281],{"className":278,"style":280},[279],"vlist","height:0.1514em;",[111,282,284,289],{"style":283},"top:-2.55em;margin-left:-0.1389em;margin-right:0.05em;",[111,285],{"className":286,"style":288},[287],"pstrut","height:2.7em;",[111,290,296],{"className":291},[292,293,294,295],"sizing","reset-size6","size3","mtight",[111,297,35],{"className":298},[257,261,295],[111,300,303],{"className":301},[302],"vlist-s","​",[111,305,307],{"className":306},[275],[111,308,311],{"className":309,"style":310},[279],"height:0.15em;",[111,312],{},[111,314],{"className":315,"style":317},[316],"mspace","margin-right:0.2778em;",[111,319,141],{"className":320},[321],"mrel",[111,323],{"className":324,"style":317},[316],[111,326,328,331,371,375,379],{"className":327},[248],[111,329],{"className":330,"style":253},[252],[111,332,334,337],{"className":333},[257],[111,335,135],{"className":336,"style":262},[257,261],[111,338,340],{"className":339},[266],[111,341,343,363],{"className":342},[270,271],[111,344,346,360],{"className":345},[275],[111,347,349],{"className":348,"style":280},[279],[111,350,351,354],{"style":283},[111,352],{"className":353,"style":288},[287],[111,355,357],{"className":356},[292,293,294,295],[111,358,35],{"className":359},[257,261,295],[111,361,303],{"className":362},[302],[111,364,366],{"className":365},[275],[111,367,369],{"className":368,"style":310},[279],[111,370],{},[111,372],{"className":373,"style":374},[316],"margin-right:0.2222em;",[111,376,150],{"className":377},[378],"mbin",[111,380],{"className":381,"style":374},[316],[111,383,385,389,393,396,399],{"className":384},[248],[111,386],{"className":387,"style":388},[252],"height:0.4445em;",[111,390,153],{"className":391,"style":392},[257,261],"margin-right:0.0037em;",[111,394],{"className":395,"style":374},[316],[111,397,156],{"className":398},[378],[111,400],{"className":401,"style":374},[316],[111,403,405,409,413,417,420,423],{"className":404},[248],[111,406],{"className":407,"style":408},[252],"height:1em;vertical-align:-0.25em;",[111,410,160],{"className":411},[412],"mopen",[111,414,163],{"className":415,"style":416},[257,261],"margin-right:0.0278em;",[111,418],{"className":419,"style":374},[316],[111,421,150],{"className":422},[378],[111,424],{"className":425,"style":374},[316],[111,427,429,433,437,487,490,524,528,532,564,568,571,574],{"className":428},[248],[111,430],{"className":431,"style":432},[252],"height:1.0019em;vertical-align:-0.25em;",[111,434,168],{"className":435,"style":436},[257,261],"margin-right:0.0556em;",[111,438,441],{"className":439},[257,440],"accent",[111,442,444,478],{"className":443},[270,271],[111,445,447,475],{"className":446},[275],[111,448,451,462],{"className":449,"style":450},[279],"height:0.6944em;",[111,452,454,458],{"style":453},"top:-3em;",[111,455],{"className":456,"style":457},[287],"height:3em;",[111,459,175],{"className":460,"style":461},[257,261],"margin-right:0.0359em;",[111,463,464,467],{"style":453},[111,465],{"className":466,"style":457},[287],[111,468,472],{"className":469,"style":471},[470],"accent-body","left:-0.1667em;",[111,473,178],{"className":474},[257],[111,476,303],{"className":477},[302],[111,479,481],{"className":480},[275],[111,482,485],{"className":483,"style":484},[279],"height:0.1944em;",[111,486],{},[111,488,160],{"className":489},[412],[111,491,493,496],{"className":492},[257],[111,494,186],{"className":495},[257,261],[111,497,499],{"className":498},[266],[111,500,502],{"className":501},[270],[111,503,505],{"className":504},[275],[111,506,509],{"className":507,"style":508},[279],"height:0.7519em;",[111,510,512,515],{"style":511},"top:-3.063em;margin-right:0.05em;",[111,513],{"className":514,"style":288},[287],[111,516,518],{"className":517},[292,293,294,295],[111,519,521],{"className":520},[257,295],[111,522,191],{"className":523},[257,295],[111,525,194],{"className":526},[527],"mpunct",[111,529],{"className":530,"style":531},[316],"margin-right:0.1667em;",[111,533,535,538],{"className":534},[257],[111,536,35],{"className":537},[257,261],[111,539,541],{"className":540},[266],[111,542,544],{"className":543},[270],[111,545,547],{"className":546},[275],[111,548,550],{"className":549,"style":508},[279],[111,551,552,555],{"style":511},[111,553],{"className":554,"style":288},[287],[111,556,558],{"className":557},[292,293,294,295],[111,559,561],{"className":560},[257,295],[111,562,191],{"className":563},[257,295],[111,565,203],{"className":566},[567],"mclose",[111,569],{"className":570,"style":374},[316],[111,572,206],{"className":573},[378],[111,575],{"className":576,"style":374},[316],[111,578,580,583,625,628,631,634,637,640,644,647,650],{"className":579},[248],[111,581],{"className":582,"style":408},[252],[111,584,586],{"className":585},[257,440],[111,587,589,617],{"className":588},[270,271],[111,590,592,614],{"className":591},[275],[111,593,595,603],{"className":594,"style":450},[279],[111,596,597,600],{"style":453},[111,598],{"className":599,"style":457},[287],[111,601,175],{"className":602,"style":461},[257,261],[111,604,605,608],{"style":453},[111,606],{"className":607,"style":457},[287],[111,609,611],{"className":610,"style":471},[470],[111,612,178],{"className":613},[257],[111,615,303],{"className":616},[302],[111,618,620],{"className":619},[275],[111,621,623],{"className":622,"style":484},[279],[111,624],{},[111,626,160],{"className":627},[412],[111,629,186],{"className":630},[257,261],[111,632,194],{"className":633},[527],[111,635],{"className":636,"style":531},[316],[111,638,35],{"className":639},[257,261],[111,641,643],{"className":642},[567],"))",[111,645],{"className":646,"style":374},[316],[111,648,156],{"className":649},[378],[111,651],{"className":652,"style":374},[316],[111,654,656,659,662,665,668],{"className":655},[248],[111,657],{"className":658,"style":408},[252],[111,660,229],{"className":661},[257,261],[111,663,160],{"className":664},[412],[111,666,186],{"className":667},[257,261],[111,669,203],{"className":670},[567],[60,672,673,678,681,683],{},[81,674,675],{},[30,676,677],{},"Q-Learning",[81,679,680],{},"TD(0), off-policy",[81,682,107],{},[81,684,685],{},[111,686,688,798],{"className":687},[114],[111,689,691],{"className":690},[118],[120,692,693],{"xmlns":122},[124,694,695,795],{},[127,696,697,703,705,711,713,715,717,719,721,723,725,741,747,749,755,757,763,765,767,773,775,777,779,781,783,785,787,789,791,793],{},[130,698,699,701],{},[133,700,135],{},[133,702,35],{},[139,704,141],{},[130,706,707,709],{},[133,708,135],{},[133,710,35],{},[139,712,150],{},[133,714,153],{},[139,716,156],{},[139,718,160],{"stretchy":159},[133,720,163],{},[139,722,150],{},[133,724,168],{},[130,726,727,735],{},[127,728,729,732],{},[133,730,731],{},"max",[139,733,734],{},"⁡",[182,736,737,739],{},[133,738,35],{},[139,740,191],{"mathvariant":189,"lspace":190,"rspace":190},[170,742,743,745],{"accent":172},[133,744,175],{},[139,746,178],{},[139,748,160],{"stretchy":159},[182,750,751,753],{},[133,752,186],{},[139,754,191],{"mathvariant":189,"lspace":190,"rspace":190},[139,756,194],{"separator":172},[182,758,759,761],{},[133,760,35],{},[139,762,191],{"mathvariant":189,"lspace":190,"rspace":190},[139,764,203],{"stretchy":159},[139,766,206],{},[170,768,769,771],{"accent":172},[133,770,175],{},[139,772,178],{},[139,774,160],{"stretchy":159},[133,776,186],{},[139,778,194],{"separator":172},[133,780,35],{},[139,782,203],{"stretchy":159},[139,784,203],{"stretchy":159},[139,786,156],{},[133,788,229],{},[139,790,160],{"stretchy":159},[133,792,186],{},[139,794,203],{"stretchy":159},[237,796,797],{"encoding":239},"W_a \\leftarrow W_a + \\alpha \\cdot (r + \\gamma \\max_{a'} \\hat{q}(s', a') - \\hat{q}(s, a)) \\cdot \\phi(s)",[111,799,801,856,911,929,950,1172,1247],{"className":800,"ariaHidden":172},[244],[111,802,804,807,847,850,853],{"className":803},[248],[111,805],{"className":806,"style":253},[252],[111,808,810,813],{"className":809},[257],[111,811,135],{"className":812,"style":262},[257,261],[111,814,816],{"className":815},[266],[111,817,819,839],{"className":818},[270,271],[111,820,822,836],{"className":821},[275],[111,823,825],{"className":824,"style":280},[279],[111,826,827,830],{"style":283},[111,828],{"className":829,"style":288},[287],[111,831,833],{"className":832},[292,293,294,295],[111,834,35],{"className":835},[257,261,295],[111,837,303],{"className":838},[302],[111,840,842],{"className":841},[275],[111,843,845],{"className":844,"style":310},[279],[111,846],{},[111,848],{"className":849,"style":317},[316],[111,851,141],{"className":852},[321],[111,854],{"className":855,"style":317},[316],[111,857,859,862,902,905,908],{"className":858},[248],[111,860],{"className":861,"style":253},[252],[111,863,865,868],{"className":864},[257],[111,866,135],{"className":867,"style":262},[257,261],[111,869,871],{"className":870},[266],[111,872,874,894],{"className":873},[270,271],[111,875,877,891],{"className":876},[275],[111,878,880],{"className":879,"style":280},[279],[111,881,882,885],{"style":283},[111,883],{"className":884,"style":288},[287],[111,886,888],{"className":887},[292,293,294,295],[111,889,35],{"className":890},[257,261,295],[111,892,303],{"className":893},[302],[111,895,897],{"className":896},[275],[111,898,900],{"className":899,"style":310},[279],[111,901],{},[111,903],{"className":904,"style":374},[316],[111,906,150],{"className":907},[378],[111,909],{"className":910,"style":374},[316],[111,912,914,917,920,923,926],{"className":913},[248],[111,915],{"className":916,"style":388},[252],[111,918,153],{"className":919,"style":392},[257,261],[111,921],{"className":922,"style":374},[316],[111,924,156],{"className":925},[378],[111,927],{"className":928,"style":374},[316],[111,930,932,935,938,941,944,947],{"className":931},[248],[111,933],{"className":934,"style":408},[252],[111,936,160],{"className":937},[412],[111,939,163],{"className":940,"style":416},[257,261],[111,942],{"className":943,"style":374},[316],[111,945,150],{"className":946},[378],[111,948],{"className":949,"style":374},[316],[111,951,953,956,959,962,1042,1045,1087,1090,1122,1125,1128,1160,1163,1166,1169],{"className":952},[248],[111,954],{"className":955,"style":432},[252],[111,957,168],{"className":958,"style":436},[257,261],[111,960],{"className":961,"style":531},[316],[111,963,966,969],{"className":964},[965],"mop",[111,967,731],{"className":968},[965],[111,970,972],{"className":971},[266],[111,973,975,1034],{"className":974},[270,271],[111,976,978,1031],{"className":977},[275],[111,979,982],{"className":980,"style":981},[279],"height:0.328em;",[111,983,985,988],{"style":984},"top:-2.55em;margin-right:0.05em;",[111,986],{"className":987,"style":288},[287],[111,989,991],{"className":990},[292,293,294,295],[111,992,994],{"className":993},[257,295],[111,995,997,1000],{"className":996},[257,295],[111,998,35],{"className":999},[257,261,295],[111,1001,1003],{"className":1002},[266],[111,1004,1006],{"className":1005},[270],[111,1007,1009],{"className":1008},[275],[111,1010,1013],{"className":1011,"style":1012},[279],"height:0.6828em;",[111,1014,1016,1020],{"style":1015},"top:-2.786em;margin-right:0.0714em;",[111,1017],{"className":1018,"style":1019},[287],"height:2.5em;",[111,1021,1025],{"className":1022},[292,1023,1024,295],"reset-size3","size1",[111,1026,1028],{"className":1027},[257,295],[111,1029,191],{"className":1030},[257,295],[111,1032,303],{"className":1033},[302],[111,1035,1037],{"className":1036},[275],[111,1038,1040],{"className":1039,"style":310},[279],[111,1041],{},[111,1043],{"className":1044,"style":531},[316],[111,1046,1048],{"className":1047},[257,440],[111,1049,1051,1079],{"className":1050},[270,271],[111,1052,1054,1076],{"className":1053},[275],[111,1055,1057,1065],{"className":1056,"style":450},[279],[111,1058,1059,1062],{"style":453},[111,1060],{"className":1061,"style":457},[287],[111,1063,175],{"className":1064,"style":461},[257,261],[111,1066,1067,1070],{"style":453},[111,1068],{"className":1069,"style":457},[287],[111,1071,1073],{"className":1072,"style":471},[470],[111,1074,178],{"className":1075},[257],[111,1077,303],{"className":1078},[302],[111,1080,1082],{"className":1081},[275],[111,1083,1085],{"className":1084,"style":484},[279],[111,1086],{},[111,1088,160],{"className":1089},[412],[111,1091,1093,1096],{"className":1092},[257],[111,1094,186],{"className":1095},[257,261],[111,1097,1099],{"className":1098},[266],[111,1100,1102],{"className":1101},[270],[111,1103,1105],{"className":1104},[275],[111,1106,1108],{"className":1107,"style":508},[279],[111,1109,1110,1113],{"style":511},[111,1111],{"className":1112,"style":288},[287],[111,1114,1116],{"className":1115},[292,293,294,295],[111,1117,1119],{"className":1118},[257,295],[111,1120,191],{"className":1121},[257,295],[111,1123,194],{"className":1124},[527],[111,1126],{"className":1127,"style":531},[316],[111,1129,1131,1134],{"className":1130},[257],[111,1132,35],{"className":1133},[257,261],[111,1135,1137],{"className":1136},[266],[111,1138,1140],{"className":1139},[270],[111,1141,1143],{"className":1142},[275],[111,1144,1146],{"className":1145,"style":508},[279],[111,1147,1148,1151],{"style":511},[111,1149],{"className":1150,"style":288},[287],[111,1152,1154],{"className":1153},[292,293,294,295],[111,1155,1157],{"className":1156},[257,295],[111,1158,191],{"className":1159},[257,295],[111,1161,203],{"className":1162},[567],[111,1164],{"className":1165,"style":374},[316],[111,1167,206],{"className":1168},[378],[111,1170],{"className":1171,"style":374},[316],[111,1173,1175,1178,1220,1223,1226,1229,1232,1235,1238,1241,1244],{"className":1174},[248],[111,1176],{"className":1177,"style":408},[252],[111,1179,1181],{"className":1180},[257,440],[111,1182,1184,1212],{"className":1183},[270,271],[111,1185,1187,1209],{"className":1186},[275],[111,1188,1190,1198],{"className":1189,"style":450},[279],[111,1191,1192,1195],{"style":453},[111,1193],{"className":1194,"style":457},[287],[111,1196,175],{"className":1197,"style":461},[257,261],[111,1199,1200,1203],{"style":453},[111,1201],{"className":1202,"style":457},[287],[111,1204,1206],{"className":1205,"style":471},[470],[111,1207,178],{"className":1208},[257],[111,1210,303],{"className":1211},[302],[111,1213,1215],{"className":1214},[275],[111,1216,1218],{"className":1217,"style":484},[279],[111,1219],{},[111,1221,160],{"className":1222},[412],[111,1224,186],{"className":1225},[257,261],[111,1227,194],{"className":1228},[527],[111,1230],{"className":1231,"style":531},[316],[111,1233,35],{"className":1234},[257,261],[111,1236,643],{"className":1237},[567],[111,1239],{"className":1240,"style":374},[316],[111,1242,156],{"className":1243},[378],[111,1245],{"className":1246,"style":374},[316],[111,1248,1250,1253,1256,1259,1262],{"className":1249},[248],[111,1251],{"className":1252,"style":408},[252],[111,1254,229],{"className":1255},[257,261],[111,1257,160],{"className":1258},[412],[111,1260,186],{"className":1261},[257,261],[111,1263,203],{"className":1264},[567],[60,1266,1267,1272,1275,1277],{},[81,1268,1269],{},[30,1270,1271],{},"Monte Carlo",[81,1273,1274],{},"First-visit MC",[81,1276,107],{},[81,1278,1279],{},[111,1280,1282,1354],{"className":1281},[114],[111,1283,1285],{"className":1284},[118],[120,1286,1287],{"xmlns":122},[124,1288,1289,1351],{},[127,1290,1291,1297,1299,1305,1307,1309,1311,1313,1321,1323,1329,1331,1333,1335,1337,1339,1341,1343,1345,1347,1349],{},[130,1292,1293,1295],{},[133,1294,135],{},[133,1296,35],{},[139,1298,141],{},[130,1300,1301,1303],{},[133,1302,135],{},[133,1304,35],{},[139,1306,150],{},[133,1308,153],{},[139,1310,156],{},[139,1312,160],{"stretchy":159},[130,1314,1315,1318],{},[133,1316,1317],{},"G",[133,1319,1320],{},"t",[139,1322,206],{},[170,1324,1325,1327],{"accent":172},[133,1326,175],{},[139,1328,178],{},[139,1330,160],{"stretchy":159},[133,1332,186],{},[139,1334,194],{"separator":172},[133,1336,35],{},[139,1338,203],{"stretchy":159},[139,1340,203],{"stretchy":159},[139,1342,156],{},[133,1344,229],{},[139,1346,160],{"stretchy":159},[133,1348,186],{},[139,1350,203],{"stretchy":159},[237,1352,1353],{"encoding":239},"W_a \\leftarrow W_a + \\alpha \\cdot (G_t - \\hat{q}(s, a)) \\cdot \\phi(s)",[111,1355,1357,1412,1467,1485,1545,1620],{"className":1356,"ariaHidden":172},[244],[111,1358,1360,1363,1403,1406,1409],{"className":1359},[248],[111,1361],{"className":1362,"style":253},[252],[111,1364,1366,1369],{"className":1365},[257],[111,1367,135],{"className":1368,"style":262},[257,261],[111,1370,1372],{"className":1371},[266],[111,1373,1375,1395],{"className":1374},[270,271],[111,1376,1378,1392],{"className":1377},[275],[111,1379,1381],{"className":1380,"style":280},[279],[111,1382,1383,1386],{"style":283},[111,1384],{"className":1385,"style":288},[287],[111,1387,1389],{"className":1388},[292,293,294,295],[111,1390,35],{"className":1391},[257,261,295],[111,1393,303],{"className":1394},[302],[111,1396,1398],{"className":1397},[275],[111,1399,1401],{"className":1400,"style":310},[279],[111,1402],{},[111,1404],{"className":1405,"style":317},[316],[111,1407,141],{"className":1408},[321],[111,1410],{"className":1411,"style":317},[316],[111,1413,1415,1418,1458,1461,1464],{"className":1414},[248],[111,1416],{"className":1417,"style":253},[252],[111,1419,1421,1424],{"className":1420},[257],[111,1422,135],{"className":1423,"style":262},[257,261],[111,1425,1427],{"className":1426},[266],[111,1428,1430,1450],{"className":1429},[270,271],[111,1431,1433,1447],{"className":1432},[275],[111,1434,1436],{"className":1435,"style":280},[279],[111,1437,1438,1441],{"style":283},[111,1439],{"className":1440,"style":288},[287],[111,1442,1444],{"className":1443},[292,293,294,295],[111,1445,35],{"className":1446},[257,261,295],[111,1448,303],{"className":1449},[302],[111,1451,1453],{"className":1452},[275],[111,1454,1456],{"className":1455,"style":310},[279],[111,1457],{},[111,1459],{"className":1460,"style":374},[316],[111,1462,150],{"className":1463},[378],[111,1465],{"className":1466,"style":374},[316],[111,1468,1470,1473,1476,1479,1482],{"className":1469},[248],[111,1471],{"className":1472,"style":388},[252],[111,1474,153],{"className":1475,"style":392},[257,261],[111,1477],{"className":1478,"style":374},[316],[111,1480,156],{"className":1481},[378],[111,1483],{"className":1484,"style":374},[316],[111,1486,1488,1491,1494,1536,1539,1542],{"className":1487},[248],[111,1489],{"className":1490,"style":408},[252],[111,1492,160],{"className":1493},[412],[111,1495,1497,1500],{"className":1496},[257],[111,1498,1317],{"className":1499},[257,261],[111,1501,1503],{"className":1502},[266],[111,1504,1506,1528],{"className":1505},[270,271],[111,1507,1509,1525],{"className":1508},[275],[111,1510,1513],{"className":1511,"style":1512},[279],"height:0.2806em;",[111,1514,1516,1519],{"style":1515},"top:-2.55em;margin-left:0em;margin-right:0.05em;",[111,1517],{"className":1518,"style":288},[287],[111,1520,1522],{"className":1521},[292,293,294,295],[111,1523,1320],{"className":1524},[257,261,295],[111,1526,303],{"className":1527},[302],[111,1529,1531],{"className":1530},[275],[111,1532,1534],{"className":1533,"style":310},[279],[111,1535],{},[111,1537],{"className":1538,"style":374},[316],[111,1540,206],{"className":1541},[378],[111,1543],{"className":1544,"style":374},[316],[111,1546,1548,1551,1593,1596,1599,1602,1605,1608,1611,1614,1617],{"className":1547},[248],[111,1549],{"className":1550,"style":408},[252],[111,1552,1554],{"className":1553},[257,440],[111,1555,1557,1585],{"className":1556},[270,271],[111,1558,1560,1582],{"className":1559},[275],[111,1561,1563,1571],{"className":1562,"style":450},[279],[111,1564,1565,1568],{"style":453},[111,1566],{"className":1567,"style":457},[287],[111,1569,175],{"className":1570,"style":461},[257,261],[111,1572,1573,1576],{"style":453},[111,1574],{"className":1575,"style":457},[287],[111,1577,1579],{"className":1578,"style":471},[470],[111,1580,178],{"className":1581},[257],[111,1583,303],{"className":1584},[302],[111,1586,1588],{"className":1587},[275],[111,1589,1591],{"className":1590,"style":484},[279],[111,1592],{},[111,1594,160],{"className":1595},[412],[111,1597,186],{"className":1598},[257,261],[111,1600,194],{"className":1601},[527],[111,1603],{"className":1604,"style":531},[316],[111,1606,35],{"className":1607},[257,261],[111,1609,643],{"className":1610},[567],[111,1612],{"className":1613,"style":374},[316],[111,1615,156],{"className":1616},[378],[111,1618],{"className":1619,"style":374},[316],[111,1621,1623,1626,1629,1632,1635],{"className":1622},[248],[111,1624],{"className":1625,"style":408},[252],[111,1627,229],{"className":1628},[257,261],[111,1630,160],{"className":1631},[412],[111,1633,186],{"className":1634},[257,261],[111,1636,203],{"className":1637},[567],[60,1639,1640,1645,1648,1650],{},[81,1641,1642],{},[30,1643,1644],{},"DQN",[81,1646,1647],{},"Deep Q-Network",[81,1649,107],{},[81,1651,1652],{},"MLP (256→256) with experience replay & target network",[42,1654,1656],{"id":1655},"architecture","Architecture",[24,1658,1659,1993],{},[27,1660,1661,1664,1665,1885,1886,1992],{},[30,1662,1663],{},"Linear agents"," (SARSA, Q-Learning, Monte Carlo): ",[111,1666,1668,1723],{"className":1667},[114],[111,1669,1671],{"className":1670},[118],[120,1672,1673],{"xmlns":122},[124,1674,1675,1720],{},[127,1676,1677,1683,1685,1687,1689,1691,1694,1697,1699,1702,1712,1714,1716,1718],{},[170,1678,1679,1681],{"accent":172},[133,1680,175],{},[139,1682,178],{},[139,1684,160],{"stretchy":159},[133,1686,186],{},[139,1688,194],{"separator":172},[133,1690,35],{},[139,1692,1693],{"separator":172},";",[133,1695,135],{"mathvariant":1696},"bold",[139,1698,203],{"stretchy":159},[139,1700,1701],{},"=",[1703,1704,1705,1707,1709],"msubsup",{},[133,1706,135],{"mathvariant":1696},[133,1708,35],{},[133,1710,1711],{"mathvariant":189},"⊤",[133,1713,229],{},[139,1715,160],{"stretchy":159},[133,1717,186],{},[139,1719,203],{"stretchy":159},[237,1721,1722],{"encoding":239},"\\hat{q}(s, a; \\mathbf{W}) = \\mathbf{W}_a^\\top \\phi(s)",[111,1724,1726,1812],{"className":1725,"ariaHidden":172},[244],[111,1727,1729,1732,1774,1777,1780,1783,1786,1789,1792,1795,1800,1803,1806,1809],{"className":1728},[248],[111,1730],{"className":1731,"style":408},[252],[111,1733,1735],{"className":1734},[257,440],[111,1736,1738,1766],{"className":1737},[270,271],[111,1739,1741,1763],{"className":1740},[275],[111,1742,1744,1752],{"className":1743,"style":450},[279],[111,1745,1746,1749],{"style":453},[111,1747],{"className":1748,"style":457},[287],[111,1750,175],{"className":1751,"style":461},[257,261],[111,1753,1754,1757],{"style":453},[111,1755],{"className":1756,"style":457},[287],[111,1758,1760],{"className":1759,"style":471},[470],[111,1761,178],{"className":1762},[257],[111,1764,303],{"className":1765},[302],[111,1767,1769],{"className":1768},[275],[111,1770,1772],{"className":1771,"style":484},[279],[111,1773],{},[111,1775,160],{"className":1776},[412],[111,1778,186],{"className":1779},[257,261],[111,1781,194],{"className":1782},[527],[111,1784],{"className":1785,"style":531},[316],[111,1787,35],{"className":1788},[257,261],[111,1790,1693],{"className":1791},[527],[111,1793],{"className":1794,"style":531},[316],[111,1796,135],{"className":1797,"style":1799},[257,1798],"mathbf","margin-right:0.016em;",[111,1801,203],{"className":1802},[567],[111,1804],{"className":1805,"style":317},[316],[111,1807,1701],{"className":1808},[321],[111,1810],{"className":1811,"style":317},[316],[111,1813,1815,1819,1873,1876,1879,1882],{"className":1814},[248],[111,1816],{"className":1817,"style":1818},[252],"height:1.0991em;vertical-align:-0.25em;",[111,1820,1822,1825],{"className":1821},[257],[111,1823,135],{"className":1824,"style":1799},[257,1798],[111,1826,1828],{"className":1827},[266],[111,1829,1831,1864],{"className":1830},[270,271],[111,1832,1834,1861],{"className":1833},[275],[111,1835,1838,1850],{"className":1836,"style":1837},[279],"height:0.8491em;",[111,1839,1841,1844],{"style":1840},"top:-2.453em;margin-left:-0.016em;margin-right:0.05em;",[111,1842],{"className":1843,"style":288},[287],[111,1845,1847],{"className":1846},[292,293,294,295],[111,1848,35],{"className":1849},[257,261,295],[111,1851,1852,1855],{"style":511},[111,1853],{"className":1854,"style":288},[287],[111,1856,1858],{"className":1857},[292,293,294,295],[111,1859,1711],{"className":1860},[257,295],[111,1862,303],{"className":1863},[302],[111,1865,1867],{"className":1866},[275],[111,1868,1871],{"className":1869,"style":1870},[279],"height:0.247em;",[111,1872],{},[111,1874,229],{"className":1875},[257,261],[111,1877,160],{"className":1878},[412],[111,1880,186],{"className":1881},[257,261],[111,1883,203],{"className":1884},[567]," with ",[111,1887,1889,1922],{"className":1888},[114],[111,1890,1892],{"className":1891},[118],[120,1893,1894],{"xmlns":122},[124,1895,1896,1919],{},[127,1897,1898,1900,1902,1904,1906,1909],{},[133,1899,229],{},[139,1901,160],{"stretchy":159},[133,1903,186],{},[139,1905,203],{"stretchy":159},[139,1907,1908],{},"∈",[182,1910,1911,1915],{},[133,1912,1914],{"mathvariant":1913},"double-struck","R",[1916,1917,1918],"mn",{},"128",[237,1920,1921],{"encoding":239},"\\phi(s) \\in \\mathbb{R}^{128}",[111,1923,1925,1952],{"className":1924,"ariaHidden":172},[244],[111,1926,1928,1931,1934,1937,1940,1943,1946,1949],{"className":1927},[248],[111,1929],{"className":1930,"style":408},[252],[111,1932,229],{"className":1933},[257,261],[111,1935,160],{"className":1936},[412],[111,1938,186],{"className":1939},[257,261],[111,1941,203],{"className":1942},[567],[111,1944],{"className":1945,"style":317},[316],[111,1947,1908],{"className":1948},[321],[111,1950],{"className":1951,"style":317},[316],[111,1953,1955,1959],{"className":1954},[248],[111,1956],{"className":1957,"style":1958},[252],"height:0.8141em;",[111,1960,1962,1966],{"className":1961},[257],[111,1963,1914],{"className":1964},[257,1965],"mathbb",[111,1967,1969],{"className":1968},[266],[111,1970,1972],{"className":1971},[270],[111,1973,1975],{"className":1974},[275],[111,1976,1978],{"className":1977,"style":1958},[279],[111,1979,1980,1983],{"style":511},[111,1981],{"className":1982,"style":288},[287],[111,1984,1986],{"className":1985},[292,293,294,295],[111,1987,1989],{"className":1988},[257,295],[111,1990,1918],{"className":1991},[257,295]," (RAM observation)",[27,1994,1995,1997],{},[30,1996,1644],{},": MLP network (128 → 128 → 64 → 18) trained with Adam optimizer, Huber loss, and periodic target network sync",[42,1999,2001],{"id":2000},"environment","Environment",[24,2003,2004,2013,2019,2025],{},[27,2005,2006,2009,2010,203],{},[30,2007,2008],{},"Game",": Atari Tennis via PettingZoo (",[19,2011,2012],{},"tennis_v3",[27,2014,2015,2018],{},[30,2016,2017],{},"Observation",": RAM state (128 features)",[27,2020,2021,2024],{},[30,2022,2023],{},"Action Space",": 18 discrete actions",[27,2026,2027,2030,2031,2034,2035,203],{},[30,2028,2029],{},"Agents",": 2 players (",[19,2032,2033],{},"first_0"," and ",[19,2036,2037],{},"second_0",[42,2039,2041],{"id":2040},"project-structure","Project Structure",[2043,2044,2049],"pre",{"className":2045,"code":2047,"language":2048},[2046],"language-text",".\n├── Project_RL_DANJOU_VON-SIEMENS.ipynb    # Main notebook\n├── README.md                              # This file\n├── checkpoints\u002F                           # Saved agent weights\n│   ├── sarsa.pkl\n│   ├── q_learning.pkl\n│   ├── montecarlo.pkl\n│   └── dqn.pkl\n└── plots\u002F                                 # Training & evaluation plots\n    ├── SARSA_training_curves.png\n    ├── Q-Learning_training_curves.png\n    ├── MonteCarlo_training_curves.png\n    ├── DQN_training_curves.png\n    ├── evaluation_results.png\n    └── championship_matrix.png\n","text",[19,2050,2047],{"__ignoreMap":2051},"",[42,2053,2055],{"id":2054},"key-results","Key Results",[2057,2058,2060],"h3",{"id":2059},"win-rate-vs-random-baseline","Win Rate vs Random Baseline",[54,2062,2063,2072],{},[57,2064,2065],{},[60,2066,2067,2069],{},[63,2068,65],{},[63,2070,2071],{},"Win Rate",[76,2073,2074,2081,2088,2095],{},[60,2075,2076,2078],{},[81,2077,101],{},[81,2079,2080],{},"88.9%",[60,2082,2083,2085],{},[81,2084,677],{},[81,2086,2087],{},"41.2%",[60,2089,2090,2092],{},[81,2091,1271],{},[81,2093,2094],{},"47.1%",[60,2096,2097,2099],{},[81,2098,1644],{},[81,2100,2101],{},"6.2%",[2057,2103,2105],{"id":2104},"championship-tournament","Championship Tournament",[15,2107,2108],{},"Full round-robin tournament where each agent faces every other agent in both positions (first_0\u002Fsecond_0).",[42,2110,2112],{"id":2111},"notebook-sections","Notebook Sections",[2114,2115,2116,2122,2128,2150,2161,2167],"ol",{},[27,2117,2118,2121],{},[30,2119,2120],{},"Configuration & Checkpoints"," — Incremental training workflow with pickle serialization",[27,2123,2124,2127],{},[30,2125,2126],{},"Utility Functions"," — Observation normalization, ε-greedy policy",[27,2129,2130,2133,2134,2137,2138,2137,2141,2137,2144,2137,2147],{},[30,2131,2132],{},"Agent Definitions"," — ",[19,2135,2136],{},"RandomAgent",", ",[19,2139,2140],{},"SarsaAgent",[19,2142,2143],{},"QLearningAgent",[19,2145,2146],{},"MonteCarloAgent",[19,2148,2149],{},"DQNAgent",[27,2151,2152,2133,2155,2137,2158],{},[30,2153,2154],{},"Training Infrastructure",[19,2156,2157],{},"train_agent()",[19,2159,2160],{},"plot_training_curves()",[27,2162,2163,2166],{},[30,2164,2165],{},"Evaluation"," — Match system, random baseline, round-robin tournament",[27,2168,2169,2172],{},[30,2170,2171],{},"Results & Visualization"," — Win rate plots, matchup matrix heatmap",[42,2174,2176],{"id":2175},"known-issues","Known Issues",[24,2178,2179],{},[27,2180,2181,2184],{},[30,2182,2183],{},"Monte Carlo & DQN",": Checkpoint loading issues — saved weights may not restore properly during evaluation (training works correctly)",[42,2186,2188],{"id":2187},"dependencies","Dependencies",[24,2190,2191,2194,2202,2207,2215,2220],{},[27,2192,2193],{},"Python 3.13+",[27,2195,2196,2137,2199],{},[19,2197,2198],{},"numpy",[19,2200,2201],{},"matplotlib",[27,2203,2204],{},[19,2205,2206],{},"torch",[27,2208,2209,2137,2212],{},[19,2210,2211],{},"gymnasium",[19,2213,2214],{},"ale-py",[27,2216,2217],{},[19,2218,2219],{},"pettingzoo",[27,2221,2222],{},[19,2223,2224],{},"tqdm",[42,2226,2228],{"id":2227},"authors","Authors",[24,2230,2231,2234],{},[27,2232,2233],{},"Arthur DANJOU",[27,2235,2236],{},"Moritz VON SIEMENS",{"title":2051,"searchDepth":2238,"depth":2238,"links":2239},2,[2240,2241,2242,2243,2244,2245,2250,2251,2252,2253],{"id":44,"depth":2238,"text":45},{"id":51,"depth":2238,"text":52},{"id":1655,"depth":2238,"text":1656},{"id":2000,"depth":2238,"text":2001},{"id":2040,"depth":2238,"text":2041},{"id":2054,"depth":2238,"text":2055,"children":2246},[2247,2249],{"id":2059,"depth":2248,"text":2060},3,{"id":2104,"depth":2248,"text":2105},{"id":2111,"depth":2238,"text":2112},{"id":2175,"depth":2238,"text":2176},{"id":2187,"depth":2238,"text":2188},{"id":2227,"depth":2238,"text":2228},"2026-03-13","Reinforcement learning algorithms applied to Atari tennis matches for strategy optimization and competitive benchmarking.","rl-tennis-atari-game","Completed","projects\u002Frl-tennis-atari-game",[2260,2261,2262,2263,2264],"Reinforcement Learning","Python","Gymnasium","Atari","ALE","Academic Project","o5Rl9-DKcMMzGIKkxNRe-u1lt0A9jIbCHSNzq29vu9Q",1777982163834]