Spaces:
Running
Running
Update index.html
Browse files- index.html +47 -3
index.html
CHANGED
|
@@ -1253,10 +1253,43 @@
|
|
| 1253 |
}, duration);
|
| 1254 |
}
|
| 1255 |
|
|
|
|
| 1256 |
// Update leaderboard
|
| 1257 |
function updateLeaderboard() {
|
| 1258 |
-
//
|
| 1259 |
-
leaderboard.sort((a, b) =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1260 |
|
| 1261 |
// Keep only top 5
|
| 1262 |
if (leaderboard.length > 5) {
|
|
@@ -1292,6 +1325,12 @@
|
|
| 1292 |
|
| 1293 |
const rewardCell = document.createElement("td");
|
| 1294 |
rewardCell.textContent = entry.reward.toFixed(1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1295 |
row.appendChild(rewardCell);
|
| 1296 |
|
| 1297 |
leaderboardBody.appendChild(row);
|
|
@@ -1341,7 +1380,12 @@
|
|
| 1341 |
function getAction(state) {
|
| 1342 |
// In optimal mode, always choose best action
|
| 1343 |
if (isOptimalMode) {
|
| 1344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1345 |
}
|
| 1346 |
|
| 1347 |
// Exploration (random action)
|
|
|
|
| 1253 |
}, duration);
|
| 1254 |
}
|
| 1255 |
|
| 1256 |
+
// Update leaderboard
|
| 1257 |
// Update leaderboard
|
| 1258 |
function updateLeaderboard() {
|
| 1259 |
+
// First sort by positive vs negative reward, then by steps
|
| 1260 |
+
leaderboard.sort((a, b) => {
|
| 1261 |
+
// First, prioritize positive rewards over negative ones
|
| 1262 |
+
if (
|
| 1263 |
+
(a.reward > 0 && b.reward < 0) ||
|
| 1264 |
+
(a.reward >= 0 && b.reward < 0)
|
| 1265 |
+
) {
|
| 1266 |
+
return -1;
|
| 1267 |
+
}
|
| 1268 |
+
if (
|
| 1269 |
+
(a.reward < 0 && b.reward > 0) ||
|
| 1270 |
+
(a.reward < 0 && b.reward >= 0)
|
| 1271 |
+
) {
|
| 1272 |
+
return 1;
|
| 1273 |
+
}
|
| 1274 |
+
|
| 1275 |
+
// If both are positive, higher reward wins
|
| 1276 |
+
if (a.reward > 0 && b.reward > 0) {
|
| 1277 |
+
// If rewards are close, sort by steps
|
| 1278 |
+
if (Math.abs(a.reward - b.reward) < 1) {
|
| 1279 |
+
return a.steps - b.steps;
|
| 1280 |
+
}
|
| 1281 |
+
// Otherwise, higher reward wins
|
| 1282 |
+
return b.reward - a.reward;
|
| 1283 |
+
}
|
| 1284 |
+
|
| 1285 |
+
// If both are negative, less negative reward wins
|
| 1286 |
+
if (a.reward < 0 && b.reward < 0) {
|
| 1287 |
+
return b.reward - a.reward;
|
| 1288 |
+
}
|
| 1289 |
+
|
| 1290 |
+
// If both rewards are exactly the same, sort by steps
|
| 1291 |
+
return a.steps - b.steps;
|
| 1292 |
+
});
|
| 1293 |
|
| 1294 |
// Keep only top 5
|
| 1295 |
if (leaderboard.length > 5) {
|
|
|
|
| 1325 |
|
| 1326 |
const rewardCell = document.createElement("td");
|
| 1327 |
rewardCell.textContent = entry.reward.toFixed(1);
|
| 1328 |
+
// Add color to reward based on value
|
| 1329 |
+
if (entry.reward > 0) {
|
| 1330 |
+
rewardCell.style.color = "var(--success)";
|
| 1331 |
+
} else if (entry.reward < 0) {
|
| 1332 |
+
rewardCell.style.color = "var(--danger)";
|
| 1333 |
+
}
|
| 1334 |
row.appendChild(rewardCell);
|
| 1335 |
|
| 1336 |
leaderboardBody.appendChild(row);
|
|
|
|
| 1380 |
function getAction(state) {
|
| 1381 |
// In optimal mode, always choose best action
|
| 1382 |
if (isOptimalMode) {
|
| 1383 |
+
const maxQ = Math.max(...qTable[state]);
|
| 1384 |
+
// If all values are 0, take a random action instead
|
| 1385 |
+
if (maxQ === 0 && qTable[state].every((val) => val === 0)) {
|
| 1386 |
+
return Math.floor(Math.random() * numActions);
|
| 1387 |
+
}
|
| 1388 |
+
return qTable[state].indexOf(maxQ);
|
| 1389 |
}
|
| 1390 |
|
| 1391 |
// Exploration (random action)
|