{"statement": "88 - 63 = 38", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.31874864897690713, "score_xfmt": null, "score_capint": null}
{"statement": "68 - 15 = 65", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 15, "p_dot": 0.29567258118186146, "score_xfmt": 6.042982916271665e-06, "score_capint": null}
{"statement": "4 x 47 = 188", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6898326309164986, "score_xfmt": null, "score_capint": null}
{"statement": "27 - 55 = -36", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.38413835037499666, "score_xfmt": null, "score_capint": null}
{"statement": "15 - 92 = -77", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5961145837791264, "score_xfmt": null, "score_capint": null}
{"statement": "11 + 27 = 38", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7787127832416445, "score_xfmt": null, "score_capint": null}
{"statement": "99 + 54 = 153", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 44, "p_dot": 0.7701345132663846, "score_xfmt": 0.9767701131524953, "score_capint": null}
{"statement": "47 x 60 = 2803", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 59, "p_dot": 0.710835876641795, "score_xfmt": 0.0022752220067351592, "score_capint": null}
{"statement": "100 x 74 = 7400", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8142577549442649, "score_xfmt": null, "score_capint": null}
{"statement": "42 x 9 = 378", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 77, "p_dot": 0.6576937953941524, "score_xfmt": 0.022190540303342523, "score_capint": null}
{"statement": "47 x 13 = 603", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6937359236180782, "score_xfmt": null, "score_capint": null}
{"statement": "21 x 9 = 193", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.602187167329248, "score_xfmt": null, "score_capint": null}
{"statement": "55 x 35 = 1923", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4926671292632818, "score_xfmt": null, "score_capint": null}
{"statement": "66 + 57 = 123", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.760013094288297, "score_xfmt": null, "score_capint": null}
{"statement": "84 - 55 = 29", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.47971596277784556, "score_xfmt": null, "score_capint": null}
{"statement": "46 - 5 = 41", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6129993363283575, "score_xfmt": null, "score_capint": null}
{"statement": "86 + 20 = 111", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4950713977450505, "score_xfmt": null, "score_capint": null}
{"statement": "57 - 68 = -23", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.29890442709438503, "score_xfmt": null, "score_capint": null}
{"statement": "40 - 30 = 6", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.32555549312382936, "score_xfmt": null, "score_capint": null}
{"statement": "21 x 33 = 693", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7518362691625953, "score_xfmt": null, "score_capint": null}
{"statement": "94 + 81 = 178", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7225179801462218, "score_xfmt": null, "score_capint": null}
{"statement": "99 + 58 = 147", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.637653668585699, "score_xfmt": null, "score_capint": null}
{"statement": "20 - 87 = -67", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6397413121303543, "score_xfmt": null, "score_capint": null}
{"statement": "51 x 15 = 765", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7698363027302548, "score_xfmt": null, "score_capint": null}
{"statement": "64 + 70 = 147", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5917825645301491, "score_xfmt": null, "score_capint": null}
{"statement": "5 x 31 = 170", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 40, "p_dot": 0.644785919226706, "score_xfmt": 4.8539536726084825e-05, "score_capint": null}
{"statement": "27 - 8 = 19", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 89, "p_dot": 0.45658508059568703, "score_xfmt": 0.9989951250865594, "score_capint": null}
{"statement": "23 - 35 = -12", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.46011051442474127, "score_xfmt": null, "score_capint": null}
{"statement": "69 x 96 = 6624", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8234027691069059, "score_xfmt": null, "score_capint": null}
{"statement": "98 x 38 = 3724", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7346249131951481, "score_xfmt": null, "score_capint": null}
{"statement": "65 - 18 = 50", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.39047551609110087, "score_xfmt": null, "score_capint": null}
{"statement": "32 - 10 = 22", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 22, "p_dot": 0.49276638962328434, "score_xfmt": 0.9984311603897601, "score_capint": null}
{"statement": "57 - 17 = 40", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 93, "p_dot": 0.6166609399369918, "score_xfmt": 0.9985702650296321, "score_capint": null}
{"statement": "97 - 51 = 44", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.569438764010556, "score_xfmt": null, "score_capint": null}
{"statement": "13 x 44 = 560", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.553091726382263, "score_xfmt": null, "score_capint": null}
{"statement": "62 + 2 = 64", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6492432896047831, "score_xfmt": null, "score_capint": null}
{"statement": "76 + 2 = 67", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.1980206990847364, "score_xfmt": null, "score_capint": null}
{"statement": "22 - 74 = -52", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 95, "p_dot": 0.596609644126147, "score_xfmt": 0.9946164188883637, "score_capint": null}
{"statement": "83 x 89 = 7387", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.741510143969208, "score_xfmt": null, "score_capint": null}
{"statement": "30 + 63 = 105", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4929644300136715, "score_xfmt": null, "score_capint": null}
{"statement": "72 x 4 = 293", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.46437289559980854, "score_xfmt": null, "score_capint": null}
{"statement": "30 x 35 = 1050", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6979220896027982, "score_xfmt": null, "score_capint": null}
{"statement": "16 - 36 = -36", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 67, "p_dot": 0.2787298238836229, "score_xfmt": 1.5020967753173146e-07, "score_capint": null}
{"statement": "83 + 88 = 171", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7980231193359941, "score_xfmt": null, "score_capint": null}
{"statement": "81 + 14 = 95", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7407072910573334, "score_xfmt": null, "score_capint": null}
{"statement": "72 x 78 = 5616", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8140494083054364, "score_xfmt": null, "score_capint": null}
{"statement": "13 x 95 = 1235", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 68, "p_dot": 0.7612041747197509, "score_xfmt": 0.9917479873896127, "score_capint": null}
{"statement": "66 + 24 = 98", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 28, "p_dot": 0.5828054969897494, "score_xfmt": 0.001655038207286358, "score_capint": null}
{"statement": "94 x 94 = 8836", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 7, "p_dot": 0.8398678887751885, "score_xfmt": 0.9722880314497866, "score_capint": null}
{"statement": "40 - 73 = -52", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.27430538018234074, "score_xfmt": null, "score_capint": null}
{"statement": "63 x 80 = 5050", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7944582141644787, "score_xfmt": null, "score_capint": null}
{"statement": "23 + 62 = 97", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4300586705794558, "score_xfmt": null, "score_capint": null}
{"statement": "79 - 71 = -12", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.25447608856484294, "score_xfmt": null, "score_capint": null}
{"statement": "71 x 7 = 515", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6451489140745252, "score_xfmt": null, "score_capint": null}
{"statement": "35 + 73 = 122", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5859074983745813, "score_xfmt": null, "score_capint": null}
{"statement": "50 - 64 = -22", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.12141873920336366, "score_xfmt": null, "score_capint": null}
{"statement": "5 + 79 = 84", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7670192972291261, "score_xfmt": null, "score_capint": null}
{"statement": "96 + 98 = 194", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7770686720032245, "score_xfmt": null, "score_capint": null}
{"statement": "69 x 99 = 6831", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8435821291059256, "score_xfmt": null, "score_capint": null}
{"statement": "24 + 90 = 114", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8204036867246032, "score_xfmt": null, "score_capint": null}
{"statement": "100 - 100 = 0", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7285095297265798, "score_xfmt": null, "score_capint": null}
{"statement": "16 x 79 = 1264", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7331913316156715, "score_xfmt": null, "score_capint": null}
{"statement": "34 x 20 = 680", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6845684937434271, "score_xfmt": null, "score_capint": null}
{"statement": "45 - 38 = 7", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 26, "p_dot": 0.5392322377301753, "score_xfmt": 0.9996771233062608, "score_capint": null}
{"statement": "94 + 67 = 172", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6187005174579099, "score_xfmt": null, "score_capint": null}
{"statement": "89 x 37 = 3293", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 38, "p_dot": 0.7877064598142169, "score_xfmt": 0.1365710288235288, "score_capint": null}
{"statement": "31 x 13 = 403", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 75, "p_dot": 0.8192490706569515, "score_xfmt": 0.9829003925058597, "score_capint": null}
{"statement": "17 x 31 = 527", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7169588468968868, "score_xfmt": null, "score_capint": null}
{"statement": "1 + 14 = 15", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 70, "p_dot": 0.4105951644014567, "score_xfmt": 0.9373814756549419, "score_capint": null}
{"statement": "4 x 13 = 52", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7998973703943193, "score_xfmt": null, "score_capint": null}
{"statement": "91 x 90 = 8196", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6297122726682574, "score_xfmt": null, "score_capint": null}
{"statement": "23 x 85 = 1958", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5278546367771924, "score_xfmt": null, "score_capint": null}
{"statement": "58 - 18 = 40", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6263370631495491, "score_xfmt": null, "score_capint": null}
{"statement": "22 x 89 = 1958", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6874843540135771, "score_xfmt": null, "score_capint": null}
{"statement": "54 + 87 = 157", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6482414406491444, "score_xfmt": null, "score_capint": null}
{"statement": "0 + 10 = 1", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.19644837331725284, "score_xfmt": null, "score_capint": null}
{"statement": "91 + 40 = 131", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7373450414743274, "score_xfmt": null, "score_capint": null}
{"statement": "10 + 32 = 24", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 109, "p_dot": 0.4279803840909153, "score_xfmt": 2.9340653709985802e-06, "score_capint": null}
{"statement": "75 x 51 = 3822", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5920356947463006, "score_xfmt": null, "score_capint": null}
{"statement": "34 - 55 = -21", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.654822364449501, "score_xfmt": null, "score_capint": null}
{"statement": "4 - 74 = -75", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5066243009641767, "score_xfmt": null, "score_capint": null}
{"statement": "41 x 21 = 876", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5032356801675633, "score_xfmt": null, "score_capint": null}
{"statement": "5 - 31 = -26", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7010562164941803, "score_xfmt": null, "score_capint": null}
{"statement": "16 - 81 = -61", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6507950498489663, "score_xfmt": null, "score_capint": null}
{"statement": "84 x 60 = 5040", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 106, "p_dot": 0.8030840505380183, "score_xfmt": 0.9617461081612735, "score_capint": null}
{"statement": "4 x 16 = 45", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5706330444663763, "score_xfmt": null, "score_capint": null}
{"statement": "14 + 84 = 93", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5494356920244172, "score_xfmt": null, "score_capint": null}
{"statement": "83 + 9 = 76", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3139925180003047, "score_xfmt": null, "score_capint": null}
{"statement": "85 - 13 = 72", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.666911254520528, "score_xfmt": null, "score_capint": null}
{"statement": "9 + 88 = 91", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5809857671847567, "score_xfmt": null, "score_capint": null}
{"statement": "64 - 67 = 3", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 1, "p_dot": 0.2378883141791448, "score_xfmt": 0.00011299592935935976, "score_capint": null}
{"statement": "12 + 89 = 119", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6418994466075674, "score_xfmt": null, "score_capint": null}
{"statement": "84 x 79 = 6616", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7788634803146124, "score_xfmt": null, "score_capint": null}
{"statement": "84 - 24 = 60", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.594240581151098, "score_xfmt": null, "score_capint": null}
{"statement": "73 - 36 = 21", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.48027015407569706, "score_xfmt": null, "score_capint": null}
{"statement": "48 + 61 = 109", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 78, "p_dot": 0.7414243232924491, "score_xfmt": 0.9840978202616458, "score_capint": null}
{"statement": "91 + 14 = 105", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 17, "p_dot": 0.796682030428201, "score_xfmt": 0.9977518786000195, "score_capint": null}
{"statement": "36 - 90 = -69", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4397338305134326, "score_xfmt": null, "score_capint": null}
{"statement": "59 - 48 = 11", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5071920150076039, "score_xfmt": null, "score_capint": null}
{"statement": "27 + 98 = 116", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 27, "p_dot": 0.5963037284091115, "score_xfmt": 0.186293352807237, "score_capint": null}
{"statement": "23 x 18 = 401", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 41, "p_dot": 0.43418513145297766, "score_xfmt": 5.70232608238189e-06, "score_capint": null}
{"statement": "17 - 49 = -20", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 18, "p_dot": 0.32037489861249924, "score_xfmt": 2.82060748276343e-05, "score_capint": null}
{"statement": "72 + 5 = 74", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.46729880501516163, "score_xfmt": null, "score_capint": null}
{"statement": "46 x 8 = 386", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 33, "p_dot": 0.6384737277403474, "score_xfmt": 0.007122624780887586, "score_capint": null}
{"statement": "58 + 40 = 98", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7364760163472965, "score_xfmt": null, "score_capint": null}
{"statement": "20 - 95 = -75", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6403325089486316, "score_xfmt": null, "score_capint": null}
{"statement": "66 - 19 = 41", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 98, "p_dot": 0.4662326615070924, "score_xfmt": 0.0028723048566943023, "score_capint": null}
{"statement": "88 + 23 = 118", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 85, "p_dot": 0.6463469059672207, "score_xfmt": 0.01552443478488646, "score_capint": null}
{"statement": "49 x 24 = 1193", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5270044628996402, "score_xfmt": null, "score_capint": null}
{"statement": "8 - 51 = -40", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2785486944485456, "score_xfmt": null, "score_capint": null}
{"statement": "76 - 8 = 68", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 57, "p_dot": 0.5363417865009978, "score_xfmt": 0.9994067328185567, "score_capint": null}
{"statement": "72 x 82 = 5884", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7501220346894115, "score_xfmt": null, "score_capint": null}
{"statement": "87 + 60 = 150", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 105, "p_dot": 0.6418963161995634, "score_xfmt": 0.0021544894299925965, "score_capint": null}
{"statement": "47 + 55 = 110", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6668395535089076, "score_xfmt": null, "score_capint": null}
{"statement": "80 + 82 = 172", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6102565118926577, "score_xfmt": null, "score_capint": null}
{"statement": "87 + 6 = 98", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5928091895766556, "score_xfmt": null, "score_capint": null}
{"statement": "73 x 24 = 1752", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7467858485179022, "score_xfmt": null, "score_capint": null}
{"statement": "11 - 81 = -70", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5412362992065027, "score_xfmt": null, "score_capint": null}
{"statement": "86 x 33 = 2851", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.8061466151848435, "score_xfmt": null, "score_capint": null}
{"statement": "67 - 37 = 47", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3048552814871073, "score_xfmt": null, "score_capint": null}
{"statement": "77 - 91 = -8", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.29719097493216395, "score_xfmt": null, "score_capint": null}
{"statement": "98 + 37 = 135", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 29, "p_dot": 0.7702111682738177, "score_xfmt": 0.991768858343346, "score_capint": null}
{"statement": "80 + 40 = 105", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.37115830020047724, "score_xfmt": null, "score_capint": null}
{"statement": "99 x 70 = 6911", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6392492454033345, "score_xfmt": null, "score_capint": null}
{"statement": "23 - 6 = 1", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3241076060803607, "score_xfmt": null, "score_capint": null}
{"statement": "94 + 69 = 163", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 9, "p_dot": 0.8486890798667446, "score_xfmt": 0.993911012999011, "score_capint": null}
{"statement": "53 + 62 = 115", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7603399329818785, "score_xfmt": null, "score_capint": null}
{"statement": "83 x 8 = 664", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 4, "p_dot": 0.6536089647561312, "score_xfmt": 0.6675392344660799, "score_capint": null}
{"statement": "29 + 8 = 37", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6901281052269042, "score_xfmt": null, "score_capint": null}
{"statement": "1 x 87 = 87", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5876439426210709, "score_xfmt": null, "score_capint": null}
{"statement": "43 + 91 = 151", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7243418546859175, "score_xfmt": null, "score_capint": null}
{"statement": "5 + 55 = 75", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.34283339139074087, "score_xfmt": null, "score_capint": null}
{"statement": "66 + 34 = 86", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4422509307041764, "score_xfmt": null, "score_capint": null}
{"statement": "40 - 85 = -33", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 71, "p_dot": 0.3152524116449058, "score_xfmt": 1.995358052141959e-05, "score_capint": null}
{"statement": "53 - 28 = 25", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5388173107639886, "score_xfmt": null, "score_capint": null}
{"statement": "86 - 68 = 18", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5011628554202616, "score_xfmt": null, "score_capint": null}
{"statement": "2 x 95 = 190", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 24, "p_dot": 0.6520197783829644, "score_xfmt": 0.956217021175916, "score_capint": null}
{"statement": "54 - 43 = 11", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5295843769563362, "score_xfmt": null, "score_capint": null}
{"statement": "97 + 70 = 148", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5895510034170002, "score_xfmt": null, "score_capint": null}
{"statement": "26 + 87 = 113", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8163999596145004, "score_xfmt": null, "score_capint": null}
{"statement": "20 x 63 = 1245", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6597280083224177, "score_xfmt": null, "score_capint": null}
{"statement": "57 x 59 = 3346", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6448359789792448, "score_xfmt": null, "score_capint": null}
{"statement": "90 - 10 = 62", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.43685421464033425, "score_xfmt": null, "score_capint": null}
{"statement": "70 x 37 = 2590", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7580176412593573, "score_xfmt": null, "score_capint": null}
{"statement": "12 - 24 = -20", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2913650784175843, "score_xfmt": null, "score_capint": null}
{"statement": "31 - 15 = 16", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5854150638333522, "score_xfmt": null, "score_capint": null}
{"statement": "6 - 33 = -27", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6596011565998197, "score_xfmt": null, "score_capint": null}
{"statement": "14 + 72 = 86", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6656096830265597, "score_xfmt": null, "score_capint": null}
{"statement": "24 x 24 = 576", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7836640232708305, "score_xfmt": null, "score_capint": null}
{"statement": "81 x 88 = 7128", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8005016075912863, "score_xfmt": null, "score_capint": null}
{"statement": "68 x 27 = 1836", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7835611638147384, "score_xfmt": null, "score_capint": null}
{"statement": "73 - 56 = 27", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.24369202041998506, "score_xfmt": null, "score_capint": null}
{"statement": "88 x 50 = 4403", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.689893453149125, "score_xfmt": null, "score_capint": null}
{"statement": "95 x 22 = 2090", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7379691561218351, "score_xfmt": null, "score_capint": null}
{"statement": "20 x 41 = 809", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7272583711892366, "score_xfmt": null, "score_capint": null}
{"statement": "79 x 16 = 1251", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 45, "p_dot": 0.5611937325447798, "score_xfmt": 4.280850507265042e-06, "score_capint": null}
{"statement": "98 x 79 = 7724", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6902461629360914, "score_xfmt": null, "score_capint": null}
{"statement": "4 - 7 = -13", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.35419567627832294, "score_xfmt": null, "score_capint": null}
{"statement": "72 + 87 = 153", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.732322275522165, "score_xfmt": null, "score_capint": null}
{"statement": "65 + 28 = 100", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4697111507412046, "score_xfmt": null, "score_capint": null}
{"statement": "24 - 66 = -22", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3664636113680899, "score_xfmt": null, "score_capint": null}
{"statement": "23 x 8 = 184", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7064157457789406, "score_xfmt": null, "score_capint": null}
{"statement": "8 - 31 = -23", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6550966631621122, "score_xfmt": null, "score_capint": null}
{"statement": "63 + 11 = 74", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7139655470382422, "score_xfmt": null, "score_capint": null}
{"statement": "83 + 82 = 165", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7284109545871615, "score_xfmt": null, "score_capint": null}
{"statement": "49 + 73 = 122", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8198179791215807, "score_xfmt": null, "score_capint": null}
{"statement": "51 + 34 = 85", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7780382940545678, "score_xfmt": null, "score_capint": null}
{"statement": "83 + 69 = 152", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7741864814888686, "score_xfmt": null, "score_capint": null}
{"statement": "21 - 52 = -31", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7089741711970419, "score_xfmt": null, "score_capint": null}
{"statement": "56 x 56 = 3136", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7782541045453399, "score_xfmt": null, "score_capint": null}
{"statement": "72 + 12 = 84", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7207498901989311, "score_xfmt": null, "score_capint": null}
{"statement": "94 x 69 = 6486", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7959503804449923, "score_xfmt": null, "score_capint": null}
{"statement": "100 x 30 = 2989", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6109055262058973, "score_xfmt": null, "score_capint": null}
{"statement": "31 + 14 = 39", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 83, "p_dot": 0.5331698242807761, "score_xfmt": 0.01609684131760552, "score_capint": null}
{"statement": "56 + 35 = 72", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 51, "p_dot": 0.4020724162692204, "score_xfmt": 1.185947006445048e-05, "score_capint": null}
{"statement": "76 x 15 = 1140", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6671750515233725, "score_xfmt": null, "score_capint": null}
{"statement": "15 - 0 = 1", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.32616700476501137, "score_xfmt": null, "score_capint": null}
{"statement": "55 x 44 = 2420", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7442583664087579, "score_xfmt": null, "score_capint": null}
{"statement": "37 + 100 = 154", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6655611626338214, "score_xfmt": null, "score_capint": null}
{"statement": "58 - 44 = 14", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.4310542093589902, "score_xfmt": null, "score_capint": null}
{"statement": "16 - 5 = 11", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.3912603759672493, "score_xfmt": null, "score_capint": null}
{"statement": "8 - 16 = 12", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 74, "p_dot": 0.30881180288270116, "score_xfmt": 0.00013365248149738965, "score_capint": null}
{"statement": "0 x 84 = -15", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.16764661076013, "score_xfmt": null, "score_capint": null}
{"statement": "38 + 84 = 122", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7843823201255873, "score_xfmt": null, "score_capint": null}
{"statement": "13 - 11 = 2", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.4788889791816473, "score_xfmt": null, "score_capint": null}
{"statement": "59 + 53 = 112", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7186869217548519, "score_xfmt": null, "score_capint": null}
{"statement": "61 + 59 = 131", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5540730540524237, "score_xfmt": null, "score_capint": null}
{"statement": "95 - 8 = 102", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.09375050250673667, "score_xfmt": null, "score_capint": null}
{"statement": "43 - 14 = 29", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.4411655204021372, "score_xfmt": null, "score_capint": null}
{"statement": "90 - 48 = 37", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.46337138523813337, "score_xfmt": null, "score_capint": null}
{"statement": "31 - 11 = 20", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 36, "p_dot": 0.5473771579563618, "score_xfmt": 0.9971347291830918, "score_capint": null}
{"statement": "98 + 99 = 197", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.713320592418313, "score_xfmt": null, "score_capint": null}
{"statement": "39 x 51 = 1989", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 117, "p_dot": 0.6944151949137449, "score_xfmt": 0.3709480411613527, "score_capint": null}
{"statement": "51 - 95 = -57", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2567542507313192, "score_xfmt": null, "score_capint": null}
{"statement": "70 x 29 = 2030", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7531138029880822, "score_xfmt": null, "score_capint": null}
{"statement": "72 x 87 = 6266", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7043578489683568, "score_xfmt": null, "score_capint": null}
{"statement": "51 + 31 = 82", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 63, "p_dot": 0.7237234296044335, "score_xfmt": 0.9969680796928648, "score_capint": null}
{"statement": "85 x 48 = 4080", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8108100097160786, "score_xfmt": null, "score_capint": null}
{"statement": "12 - 50 = -27", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2582409961614758, "score_xfmt": null, "score_capint": null}
{"statement": "84 - 54 = 10", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 12, "p_dot": 0.3978663287125528, "score_xfmt": 0.04707994255532218, "score_capint": null}
{"statement": "32 x 5 = 160", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7154819292481989, "score_xfmt": null, "score_capint": null}
{"statement": "87 x 40 = 3492", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6312640882097185, "score_xfmt": null, "score_capint": null}
{"statement": "77 + 54 = 131", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7857421597000211, "score_xfmt": null, "score_capint": null}
{"statement": "33 x 26 = 858", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.802999316714704, "score_xfmt": null, "score_capint": null}
{"statement": "86 - 48 = 38", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 54, "p_dot": 0.5682711834087968, "score_xfmt": 0.999566020878076, "score_capint": null}
{"statement": "19 - 27 = -8", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5465459873666987, "score_xfmt": null, "score_capint": null}
{"statement": "16 - 24 = -8", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.4277554168365896, "score_xfmt": null, "score_capint": null}
{"statement": "16 x 100 = 1593", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4216490313410759, "score_xfmt": null, "score_capint": null}
{"statement": "6 x 80 = 469", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 92, "p_dot": 0.6950559190008789, "score_xfmt": 0.017613668408534844, "score_capint": null}
{"statement": "42 x 16 = 655", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 10, "p_dot": 0.4541976333130151, "score_xfmt": 0.00015731796979546965, "score_capint": null}
{"statement": "34 - 39 = -13", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.1982170530827716, "score_xfmt": null, "score_capint": null}
{"statement": "56 x 44 = 2474", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 108, "p_dot": 0.7026581498794258, "score_xfmt": 0.02160230225407544, "score_capint": null}
{"statement": "14 x 36 = 485", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 39, "p_dot": 0.40285632107406855, "score_xfmt": 7.205983380964069e-05, "score_capint": null}
{"statement": "25 - 5 = 30", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.09873565298039466, "score_xfmt": null, "score_capint": null}
{"statement": "68 x 87 = 5916", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7879253693390638, "score_xfmt": null, "score_capint": null}
{"statement": "77 + 25 = 102", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8199856159917545, "score_xfmt": null, "score_capint": null}
{"statement": "97 x 20 = 1940", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6981729826657102, "score_xfmt": null, "score_capint": null}
{"statement": "3 - 14 = -11", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6318453480489552, "score_xfmt": null, "score_capint": null}
{"statement": "92 - 21 = 71", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8147441573673859, "score_xfmt": null, "score_capint": null}
{"statement": "81 + 33 = 114", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7354117755312473, "score_xfmt": null, "score_capint": null}
{"statement": "17 + 59 = 76", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7035780708538368, "score_xfmt": null, "score_capint": null}
{"statement": "35 + 56 = 91", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 32, "p_dot": 0.8066483323927969, "score_xfmt": 0.9921685379354107, "score_capint": null}
{"statement": "3 x 50 = 150", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5921106060268357, "score_xfmt": null, "score_capint": null}
{"statement": "73 + 83 = 167", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4641243974911049, "score_xfmt": null, "score_capint": null}
{"statement": "82 x 21 = 1703", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6399149461649358, "score_xfmt": null, "score_capint": null}
{"statement": "11 - 8 = 16", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.12369388749357313, "score_xfmt": null, "score_capint": null}
{"statement": "38 x 81 = 3078", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 118, "p_dot": 0.7936165480641648, "score_xfmt": 0.4277777568611137, "score_capint": null}
{"statement": "82 + 3 = 101", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.369571226881817, "score_xfmt": null, "score_capint": null}
{"statement": "67 x 71 = 4757", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7960537537001073, "score_xfmt": null, "score_capint": null}
{"statement": "3 + 11 = 33", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.45995504467282444, "score_xfmt": null, "score_capint": null}
{"statement": "95 - 11 = 88", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4808141045505181, "score_xfmt": null, "score_capint": null}
{"statement": "55 x 22 = 1210", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7576307399431244, "score_xfmt": null, "score_capint": null}
{"statement": "61 - 27 = 34", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5878090016776696, "score_xfmt": null, "score_capint": null}
{"statement": "95 x 89 = 8469", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.8127805156400427, "score_xfmt": null, "score_capint": null}
{"statement": "14 + 59 = 68", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.428592722164467, "score_xfmt": null, "score_capint": null}
{"statement": "71 x 25 = 1775", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7229241032619029, "score_xfmt": null, "score_capint": null}
{"statement": "43 x 35 = 1514", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5997403033543378, "score_xfmt": null, "score_capint": null}
{"statement": "78 + 94 = 179", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.705728419823572, "score_xfmt": null, "score_capint": null}
{"statement": "14 x 63 = 864", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7244759150780737, "score_xfmt": null, "score_capint": null}
{"statement": "59 x 6 = 354", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7261659690411761, "score_xfmt": null, "score_capint": null}
{"statement": "12 - 87 = -57", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3251728236209601, "score_xfmt": null, "score_capint": null}
{"statement": "59 x 36 = 2106", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.728070683311671, "score_xfmt": null, "score_capint": null}
{"statement": "46 - 11 = 31", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 61, "p_dot": 0.5033183118212037, "score_xfmt": 0.007749950311840907, "score_capint": null}
{"statement": "66 - 48 = 3", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.25911415880545974, "score_xfmt": null, "score_capint": null}
{"statement": "62 x 27 = 1674", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7352438124362379, "score_xfmt": null, "score_capint": null}
{"statement": "95 + 40 = 135", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6924977541202679, "score_xfmt": null, "score_capint": null}
{"statement": "44 + 57 = 113", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.645790352486074, "score_xfmt": null, "score_capint": null}
{"statement": "0 x 92 = 0", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.3501241805497557, "score_xfmt": null, "score_capint": null}
{"statement": "84 x 81 = 6804", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8677110946155153, "score_xfmt": null, "score_capint": null}
{"statement": "7 x 51 = 357", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6858860747888684, "score_xfmt": null, "score_capint": null}
{"statement": "86 - 68 = 29", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.14195560291409492, "score_xfmt": null, "score_capint": null}
{"statement": "7 - 21 = -14", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 94, "p_dot": 0.5897972145467065, "score_xfmt": 0.9989555844907061, "score_capint": null}
{"statement": "27 - 56 = -21", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3163055961485952, "score_xfmt": null, "score_capint": null}
{"statement": "24 + 89 = 94", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4450415555620566, "score_xfmt": null, "score_capint": null}
{"statement": "9 + 18 = 40", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4309541251277551, "score_xfmt": null, "score_capint": null}
{"statement": "34 - 42 = -8", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 100, "p_dot": 0.555880464380607, "score_xfmt": 0.9950434601226754, "score_capint": null}
{"statement": "82 x 22 = 1819", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6188423965359107, "score_xfmt": null, "score_capint": null}
{"statement": "58 + 90 = 148", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7981279146333691, "score_xfmt": null, "score_capint": null}
{"statement": "75 - 34 = 41", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6434767161263153, "score_xfmt": null, "score_capint": null}
{"statement": "19 - 24 = -5", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.3242475134320557, "score_xfmt": null, "score_capint": null}
{"statement": "0 - 76 = -76", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7069261571159586, "score_xfmt": null, "score_capint": null}
{"statement": "72 - 38 = 34", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5406508435844444, "score_xfmt": null, "score_capint": null}
{"statement": "100 - 60 = 40", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6494809311116114, "score_xfmt": null, "score_capint": null}
{"statement": "64 + 58 = 129", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5882401987910271, "score_xfmt": null, "score_capint": null}
{"statement": "82 - 65 = 17", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 2, "p_dot": 0.4839574387297034, "score_xfmt": 0.9997762956766224, "score_capint": null}
{"statement": "14 - 99 = -93", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 6, "p_dot": 0.3753180536441505, "score_xfmt": 2.0865388645453722e-07, "score_capint": null}
{"statement": "29 + 42 = 51", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 65, "p_dot": 0.7414551894180477, "score_xfmt": 0.34430524654868194, "score_capint": null}
{"statement": "8 - 85 = -77", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.555732823908329, "score_xfmt": null, "score_capint": null}
{"statement": "79 x 40 = 3160", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7937076259404421, "score_xfmt": null, "score_capint": null}
{"statement": "78 x 92 = 7176", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7918419885099865, "score_xfmt": null, "score_capint": null}
{"statement": "90 + 57 = 127", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7362419962882996, "score_xfmt": null, "score_capint": null}
{"statement": "76 - 5 = 82", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 96, "p_dot": 0.04497467540204525, "score_xfmt": 0.0006364893570512476, "score_capint": null}
{"statement": "78 + 98 = 176", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7165274452418089, "score_xfmt": null, "score_capint": null}
{"statement": "10 x 10 = 100", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6289556277333759, "score_xfmt": null, "score_capint": null}
{"statement": "24 + 12 = 36", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7607378251850605, "score_xfmt": null, "score_capint": null}
{"statement": "10 x 36 = 360", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6649040088523179, "score_xfmt": null, "score_capint": null}
{"statement": "48 x 68 = 3272", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 31, "p_dot": 0.7657988481223583, "score_xfmt": 0.0017950165584169975, "score_capint": null}
{"statement": "12 - 45 = -33", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6248879611957818, "score_xfmt": null, "score_capint": null}
{"statement": "89 - 97 = -19", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.28309451532550156, "score_xfmt": null, "score_capint": null}
{"statement": "28 - 87 = -59", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6209402708336711, "score_xfmt": null, "score_capint": null}
{"statement": "27 - 48 = -36", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.34197013813536614, "score_xfmt": null, "score_capint": null}
{"statement": "39 x 13 = 507", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7953776839422062, "score_xfmt": null, "score_capint": null}
{"statement": "57 x 22 = 1269", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6640746514312923, "score_xfmt": null, "score_capint": null}
{"statement": "42 + 35 = 77", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7543038458097726, "score_xfmt": null, "score_capint": null}
{"statement": "58 x 53 = 3074", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 91, "p_dot": 0.7541496984194964, "score_xfmt": 0.11193426383609102, "score_capint": null}
{"statement": "5 + 84 = 89", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7833260814659297, "score_xfmt": null, "score_capint": null}
{"statement": "29 - 4 = 25", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5027005773736164, "score_xfmt": null, "score_capint": null}
{"statement": "16 - 97 = -64", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.33156209299340844, "score_xfmt": null, "score_capint": null}
{"statement": "94 + 35 = 129", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7309074157383293, "score_xfmt": null, "score_capint": null}
{"statement": "33 + 50 = 83", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 34, "p_dot": 0.7378330477513373, "score_xfmt": 0.9961542905605337, "score_capint": null}
{"statement": "70 - 0 = 70", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 110, "p_dot": 0.6619834633311257, "score_xfmt": 0.9857211472748793, "score_capint": null}
{"statement": "75 + 35 = 110", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 60, "p_dot": 0.6780713812913746, "score_xfmt": 0.9832627626563907, "score_capint": null}
{"statement": "97 - 69 = 43", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.32574385637417436, "score_xfmt": null, "score_capint": null}
{"statement": "91 + 36 = 127", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7637269732076675, "score_xfmt": null, "score_capint": null}
{"statement": "68 x 54 = 3672", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 66, "p_dot": 0.8064899393357337, "score_xfmt": 0.9027514074507257, "score_capint": null}
{"statement": "34 x 89 = 3026", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8435859508463182, "score_xfmt": null, "score_capint": null}
{"statement": "53 x 77 = 4061", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.8323935648368206, "score_xfmt": null, "score_capint": null}
{"statement": "92 x 61 = 5626", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7568074886221439, "score_xfmt": null, "score_capint": null}
{"statement": "98 - 73 = 45", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.421343938796781, "score_xfmt": null, "score_capint": null}
{"statement": "41 - 43 = -2", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.38113580021308735, "score_xfmt": null, "score_capint": null}
{"statement": "46 + 39 = 85", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 37, "p_dot": 0.803319682367146, "score_xfmt": 0.9886640451540426, "score_capint": null}
{"statement": "2 x 97 = 203", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.41473993146792054, "score_xfmt": null, "score_capint": null}
{"statement": "45 x 26 = 1170", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7178969455417246, "score_xfmt": null, "score_capint": null}
{"statement": "56 - 80 = -15", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.21302063390612602, "score_xfmt": null, "score_capint": null}
{"statement": "41 - 27 = 14", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.3703822873067111, "score_xfmt": null, "score_capint": null}
{"statement": "9 x 49 = 456", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 104, "p_dot": 0.48546987725421786, "score_xfmt": 0.0005495804859561646, "score_capint": null}
{"statement": "28 + 62 = 90", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 23, "p_dot": 0.6861054007895291, "score_xfmt": 0.9933965380768952, "score_capint": null}
{"statement": "1 + 92 = 107", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4362594799604267, "score_xfmt": null, "score_capint": null}
{"statement": "11 + 96 = 107", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6977504807291552, "score_xfmt": null, "score_capint": null}
{"statement": "33 + 64 = 97", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7002329917158931, "score_xfmt": null, "score_capint": null}
{"statement": "78 x 36 = 2793", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7406754407566041, "score_xfmt": null, "score_capint": null}
{"statement": "6 + 44 = 50", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7119460271205753, "score_xfmt": null, "score_capint": null}
{"statement": "73 x 45 = 3295", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.8150931817945093, "score_xfmt": null, "score_capint": null}
{"statement": "90 + 80 = 170", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6907099038362503, "score_xfmt": null, "score_capint": null}
{"statement": "88 x 25 = 2200", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.800796417548554, "score_xfmt": null, "score_capint": null}
{"statement": "98 + 47 = 145", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.823475389246596, "score_xfmt": null, "score_capint": null}
{"statement": "77 - 17 = 62", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 101, "p_dot": 0.49151252675801516, "score_xfmt": 0.15980117116207856, "score_capint": null}
{"statement": "62 + 24 = 81", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6141980546526611, "score_xfmt": null, "score_capint": null}
{"statement": "46 + 96 = 149", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 0, "p_dot": 0.45748767466284335, "score_xfmt": 1.7493416998564118e-06, "score_capint": null}
{"statement": "19 x 70 = 1350", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6605637916363776, "score_xfmt": null, "score_capint": null}
{"statement": "38 x 75 = 2866", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6657783053815365, "score_xfmt": null, "score_capint": null}
{"statement": "93 - 19 = 78", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5249795577256009, "score_xfmt": null, "score_capint": null}
{"statement": "82 + 43 = 125", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7359495434211567, "score_xfmt": null, "score_capint": null}
{"statement": "72 + 71 = 155", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5248310029273853, "score_xfmt": null, "score_capint": null}
{"statement": "77 + 65 = 142", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.804430581163615, "score_xfmt": null, "score_capint": null}
{"statement": "10 x 41 = 400", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4081024054903537, "score_xfmt": null, "score_capint": null}
{"statement": "27 x 19 = 497", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6672565946355462, "score_xfmt": null, "score_capint": null}
{"statement": "31 + 25 = 56", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 64, "p_dot": 0.7868863421026617, "score_xfmt": 0.998535827593504, "score_capint": null}
{"statement": "35 x 79 = 2745", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.8468253221944906, "score_xfmt": null, "score_capint": null}
{"statement": "5 - 81 = -88", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3557002632878721, "score_xfmt": null, "score_capint": null}
{"statement": "53 x 84 = 4452", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7903966163285077, "score_xfmt": null, "score_capint": null}
{"statement": "36 x 54 = 1944", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 86, "p_dot": 0.6752908760681748, "score_xfmt": 0.9656726305361282, "score_capint": null}
{"statement": "85 x 54 = 4578", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 112, "p_dot": 0.6262638980988413, "score_xfmt": 0.006086537389755677, "score_capint": null}
{"statement": "24 - 99 = -63", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 13, "p_dot": 0.3577954303473234, "score_xfmt": 0.0024174974736557004, "score_capint": null}
{"statement": "11 x 78 = 841", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7972576341126114, "score_xfmt": null, "score_capint": null}
{"statement": "0 - 78 = -70", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3546165858861059, "score_xfmt": null, "score_capint": null}
{"statement": "25 - 46 = -21", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 69, "p_dot": 0.5776690789498389, "score_xfmt": 0.9986546846990166, "score_capint": null}
{"statement": "24 + 40 = 82", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6250314887147397, "score_xfmt": null, "score_capint": null}
{"statement": "18 x 16 = 297", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.530833950266242, "score_xfmt": null, "score_capint": null}
{"statement": "83 - 47 = 36", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 62, "p_dot": 0.6434333698125556, "score_xfmt": 0.9988327855110484, "score_capint": null}
{"statement": "71 + 46 = 104", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 113, "p_dot": 0.542668090260122, "score_xfmt": 0.0012055402710136724, "score_capint": null}
{"statement": "15 + 58 = 80", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.36967288004234433, "score_xfmt": null, "score_capint": null}
{"statement": "62 + 25 = 92", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6191853029886261, "score_xfmt": null, "score_capint": null}
{"statement": "84 - 91 = -7", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6101733522955328, "score_xfmt": null, "score_capint": null}
{"statement": "38 x 1 = 20", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 87, "p_dot": 0.23395507829263806, "score_xfmt": 7.649568368635264e-06, "score_capint": null}
{"statement": "55 x 0 = 0", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 81, "p_dot": 0.6499929580604658, "score_xfmt": 0.9968646746452795, "score_capint": null}
{"statement": "37 - 95 = -62", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5990789179923013, "score_xfmt": null, "score_capint": null}
{"statement": "17 - 4 = 29", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.17520015221089125, "score_xfmt": null, "score_capint": null}
{"statement": "74 + 84 = 146", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.46914942166768014, "score_xfmt": null, "score_capint": null}
{"statement": "70 - 18 = 52", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6497750515118241, "score_xfmt": null, "score_capint": null}
{"statement": "72 + 84 = 156", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7512697414495051, "score_xfmt": null, "score_capint": null}
{"statement": "1 - 70 = -72", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.34419265401083976, "score_xfmt": null, "score_capint": null}
{"statement": "37 x 41 = 1503", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 35, "p_dot": 0.6883314549922943, "score_xfmt": 0.013281484521530591, "score_capint": null}
{"statement": "43 x 2 = 86", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7415891343262047, "score_xfmt": null, "score_capint": null}
{"statement": "88 x 19 = 1672", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7217744439840317, "score_xfmt": null, "score_capint": null}
{"statement": "69 + 10 = 79", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 8, "p_dot": 0.7440920909866691, "score_xfmt": 0.9934798105517694, "score_capint": null}
{"statement": "64 x 13 = 832", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7872210590285249, "score_xfmt": null, "score_capint": null}
{"statement": "13 + 55 = 77", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.504605446010828, "score_xfmt": null, "score_capint": null}
{"statement": "81 x 21 = 1701", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7418317031115294, "score_xfmt": null, "score_capint": null}
{"statement": "30 x 80 = 2410", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.773471842519939, "score_xfmt": null, "score_capint": null}
{"statement": "61 + 60 = 136", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.39992591191548854, "score_xfmt": null, "score_capint": null}
{"statement": "100 - 40 = 60", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7008293271064758, "score_xfmt": null, "score_capint": null}
{"statement": "85 + 45 = 143", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6866578271146864, "score_xfmt": null, "score_capint": null}
{"statement": "90 - 12 = 84", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3695551751879975, "score_xfmt": null, "score_capint": null}
{"statement": "54 + 4 = 58", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6828076106030494, "score_xfmt": null, "score_capint": null}
{"statement": "84 - 88 = -5", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2351637517567724, "score_xfmt": null, "score_capint": null}
{"statement": "57 - 31 = 26", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5679372911108658, "score_xfmt": null, "score_capint": null}
{"statement": "94 x 56 = 5264", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8370653747115284, "score_xfmt": null, "score_capint": null}
{"statement": "94 - 42 = 52", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 56, "p_dot": 0.6665527898585424, "score_xfmt": 0.9940212595803115, "score_capint": null}
{"statement": "33 - 16 = 17", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5503100561327301, "score_xfmt": null, "score_capint": null}
{"statement": "40 - 96 = -56", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6165401848265901, "score_xfmt": null, "score_capint": null}
{"statement": "7 + 65 = 72", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 3, "p_dot": 0.7306021107360721, "score_xfmt": 0.9969619601456521, "score_capint": null}
{"statement": "42 + 17 = 71", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6232049445388839, "score_xfmt": null, "score_capint": null}
{"statement": "1 x 58 = 58", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 25, "p_dot": 0.5269755098270252, "score_xfmt": 0.9809853186018832, "score_capint": null}
{"statement": "13 x 86 = 1118", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7397870540153235, "score_xfmt": null, "score_capint": null}
{"statement": "72 - 66 = 6", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.532191485632211, "score_xfmt": null, "score_capint": null}
{"statement": "79 + 95 = 174", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7857209041249007, "score_xfmt": null, "score_capint": null}
{"statement": "76 + 7 = 76", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.35207183938473463, "score_xfmt": null, "score_capint": null}
{"statement": "98 + 16 = 114", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.715976964100264, "score_xfmt": null, "score_capint": null}
{"statement": "54 - 52 = 2", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5518595210742205, "score_xfmt": null, "score_capint": null}
{"statement": "14 x 19 = 266", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6780185638926923, "score_xfmt": null, "score_capint": null}
{"statement": "74 x 70 = 5178", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7190193214919418, "score_xfmt": null, "score_capint": null}
{"statement": "83 - 41 = 42", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5961903649149463, "score_xfmt": null, "score_capint": null}
{"statement": "71 - 68 = 3", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.38033004268072546, "score_xfmt": null, "score_capint": null}
{"statement": "52 + 3 = 55", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7151154555613175, "score_xfmt": null, "score_capint": null}
{"statement": "35 - 96 = -61", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7424286897294223, "score_xfmt": null, "score_capint": null}
{"statement": "89 - 49 = 40", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6682424205355346, "score_xfmt": null, "score_capint": null}
{"statement": "27 - 65 = -38", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6314899717690423, "score_xfmt": null, "score_capint": null}
{"statement": "70 + 9 = 78", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5805253869621083, "score_xfmt": null, "score_capint": null}
{"statement": "35 x 92 = 3237", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7224273583851755, "score_xfmt": null, "score_capint": null}
{"statement": "85 - 15 = 82", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.22443622897844762, "score_xfmt": null, "score_capint": null}
{"statement": "78 - 52 = 28", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 115, "p_dot": 0.2402581050992012, "score_xfmt": 0.00014866971171472216, "score_capint": null}
{"statement": "34 x 27 = 914", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 50, "p_dot": 0.7185182778630406, "score_xfmt": 0.9190178221092572, "score_capint": null}
{"statement": "41 - 14 = 44", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 43, "p_dot": 0.24618071410804987, "score_xfmt": 0.0002530566572040087, "score_capint": null}
{"statement": "30 x 99 = 2958", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 46, "p_dot": 0.6759194782935083, "score_xfmt": 0.0003615272772695519, "score_capint": null}
{"statement": "70 - 69 = 11", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 42, "p_dot": 0.22721254616044462, "score_xfmt": 0.0010844670381347772, "score_capint": null}
{"statement": "60 + 28 = 88", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7193611015100032, "score_xfmt": null, "score_capint": null}
{"statement": "80 x 38 = 3022", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6703112309332937, "score_xfmt": null, "score_capint": null}
{"statement": "64 + 77 = 141", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.859353495616233, "score_xfmt": null, "score_capint": null}
{"statement": "58 - 51 = 3", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2926190313883126, "score_xfmt": null, "score_capint": null}
{"statement": "69 - 60 = 11", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4148299116641283, "score_xfmt": null, "score_capint": null}
{"statement": "38 + 61 = 86", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5370707644615322, "score_xfmt": null, "score_capint": null}
{"statement": "25 + 54 = 61", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6005039918236434, "score_xfmt": null, "score_capint": null}
{"statement": "62 + 80 = 142", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7752359504811466, "score_xfmt": null, "score_capint": null}
{"statement": "3 - 5 = -4", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.23099637718405575, "score_xfmt": null, "score_capint": null}
{"statement": "28 + 0 = 28", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7449579623062164, "score_xfmt": null, "score_capint": null}
{"statement": "25 x 22 = 555", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6003848103573546, "score_xfmt": null, "score_capint": null}
{"statement": "55 - 20 = 35", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5691768337856047, "score_xfmt": null, "score_capint": null}
{"statement": "43 + 13 = 56", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.66070551215671, "score_xfmt": null, "score_capint": null}
{"statement": "47 + 70 = 134", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5334251601016149, "score_xfmt": null, "score_capint": null}
{"statement": "19 - 29 = -10", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.47868891793768853, "score_xfmt": null, "score_capint": null}
{"statement": "10 - 82 = -72", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6967617858899757, "score_xfmt": null, "score_capint": null}
{"statement": "7 x 30 = 210", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7596210229676217, "score_xfmt": null, "score_capint": null}
{"statement": "84 + 80 = 183", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7130909925326705, "score_xfmt": null, "score_capint": null}
{"statement": "5 + 96 = 101", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8167598092113622, "score_xfmt": null, "score_capint": null}
{"statement": "3 x 39 = 117", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6178621455328539, "score_xfmt": null, "score_capint": null}
{"statement": "88 - 76 = 2", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 47, "p_dot": 0.4911854389356449, "score_xfmt": 0.20668718325385974, "score_capint": null}
{"statement": "57 + 33 = 90", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7762607430340722, "score_xfmt": null, "score_capint": null}
{"statement": "19 - 34 = -15", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5271620835992508, "score_xfmt": null, "score_capint": null}
{"statement": "13 + 48 = 61", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7535234490642324, "score_xfmt": null, "score_capint": null}
{"statement": "14 x 87 = 1218", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6982325538992882, "score_xfmt": null, "score_capint": null}
{"statement": "95 - 70 = 37", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.16676264442503452, "score_xfmt": null, "score_capint": null}
{"statement": "55 x 46 = 2530", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 48, "p_dot": 0.7248476091772318, "score_xfmt": 0.9688825597146802, "score_capint": null}
{"statement": "39 + 63 = 105", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7342194620287046, "score_xfmt": null, "score_capint": null}
{"statement": "69 x 11 = 759", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 90, "p_dot": 0.7428516375366598, "score_xfmt": 0.9905149817231246, "score_capint": null}
{"statement": "96 x 54 = 5186", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7261241504456848, "score_xfmt": null, "score_capint": null}
{"statement": "28 - 57 = -30", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 14, "p_dot": 0.3052364307222888, "score_xfmt": 1.54100342727775e-08, "score_capint": null}
{"statement": "25 - 49 = -28", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.23788390774279833, "score_xfmt": null, "score_capint": null}
{"statement": "93 x 6 = 558", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6920321939978749, "score_xfmt": null, "score_capint": null}
{"statement": "85 - 86 = -1", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 19, "p_dot": 0.48812597314827144, "score_xfmt": 0.9985645970214694, "score_capint": null}
{"statement": "14 + 17 = 29", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6811064556241035, "score_xfmt": null, "score_capint": null}
{"statement": "36 - 26 = 10", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.4578120278310962, "score_xfmt": null, "score_capint": null}
{"statement": "76 - 39 = 41", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.30481516383588314, "score_xfmt": null, "score_capint": null}
{"statement": "93 x 58 = 5394", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7171480506658554, "score_xfmt": null, "score_capint": null}
{"statement": "73 + 60 = 133", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 116, "p_dot": 0.6621312039787881, "score_xfmt": 0.9920779485187972, "score_capint": null}
{"statement": "47 - 47 = 3", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.23369807552080601, "score_xfmt": null, "score_capint": null}
{"statement": "25 + 18 = 43", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7810328595805913, "score_xfmt": null, "score_capint": null}
{"statement": "16 x 92 = 1472", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.625772119150497, "score_xfmt": null, "score_capint": null}
{"statement": "49 + 71 = 129", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6507076963316649, "score_xfmt": null, "score_capint": null}
{"statement": "11 + 55 = 78", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5070335277123377, "score_xfmt": null, "score_capint": null}
{"statement": "97 x 62 = 6016", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.666799764148891, "score_xfmt": null, "score_capint": null}
{"statement": "74 + 18 = 107", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.49536444421391934, "score_xfmt": null, "score_capint": null}
{"statement": "59 - 89 = -30", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5669243790907785, "score_xfmt": null, "score_capint": null}
{"statement": "6 - 74 = -68", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6220073874574155, "score_xfmt": null, "score_capint": null}
{"statement": "66 - 83 = -17", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5377871430828236, "score_xfmt": null, "score_capint": null}
{"statement": "85 + 39 = 116", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 58, "p_dot": 0.49899889167863876, "score_xfmt": 0.739658462432472, "score_capint": null}
{"statement": "95 - 74 = 21", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6370544492965564, "score_xfmt": null, "score_capint": null}
{"statement": "16 x 100 = 1612", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4550664504058659, "score_xfmt": null, "score_capint": null}
{"statement": "82 x 9 = 738", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 97, "p_dot": 0.7374151388648897, "score_xfmt": 0.9711431926612146, "score_capint": null}
{"statement": "29 x 33 = 976", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.48745910357683897, "score_xfmt": null, "score_capint": null}
{"statement": "77 x 26 = 2002", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6580644031055272, "score_xfmt": null, "score_capint": null}
{"statement": "99 - 71 = 28", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6822043439606205, "score_xfmt": null, "score_capint": null}
{"statement": "36 + 10 = 37", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 88, "p_dot": 0.4112299921689555, "score_xfmt": 0.0003275852828403973, "score_capint": null}
{"statement": "95 - 63 = 40", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3532572593539953, "score_xfmt": null, "score_capint": null}
{"statement": "31 - 9 = 22", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5366646500770003, "score_xfmt": null, "score_capint": null}
{"statement": "39 x 76 = 2983", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7748201657086611, "score_xfmt": null, "score_capint": null}
{"statement": "98 x 74 = 7252", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.779648068593815, "score_xfmt": null, "score_capint": null}
{"statement": "26 - 53 = -27", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5832831559237093, "score_xfmt": null, "score_capint": null}
{"statement": "47 x 39 = 1844", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 20, "p_dot": 0.6184391344431788, "score_xfmt": 0.00017525101410427704, "score_capint": null}
{"statement": "72 + 76 = 146", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6190411469433457, "score_xfmt": null, "score_capint": null}
{"statement": "56 - 15 = 51", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5340372327482328, "score_xfmt": null, "score_capint": null}
{"statement": "86 - 100 = -16", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.1301662486512214, "score_xfmt": null, "score_capint": null}
{"statement": "31 + 28 = 59", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6956983568379655, "score_xfmt": null, "score_capint": null}
{"statement": "75 x 88 = 6600", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8021387131884694, "score_xfmt": null, "score_capint": null}
{"statement": "28 x 17 = 476", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7696970251272433, "score_xfmt": null, "score_capint": null}
{"statement": "12 x 71 = 839", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6814177047926933, "score_xfmt": null, "score_capint": null}
{"statement": "34 - 21 = 29", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.26763374102301896, "score_xfmt": null, "score_capint": null}
{"statement": "80 + 56 = 129", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5184725655708462, "score_xfmt": null, "score_capint": null}
{"statement": "30 - 39 = -16", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.08464014064520597, "score_xfmt": null, "score_capint": null}
{"statement": "52 x 42 = 2184", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7034891236107796, "score_xfmt": null, "score_capint": null}
{"statement": "19 - 9 = 1", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5026833198498935, "score_xfmt": null, "score_capint": null}
{"statement": "85 + 94 = 179", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8025513729080558, "score_xfmt": null, "score_capint": null}
{"statement": "42 + 11 = 53", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7542373844189569, "score_xfmt": null, "score_capint": null}
{"statement": "48 - 76 = -28", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.589804004994221, "score_xfmt": null, "score_capint": null}
{"statement": "35 x 58 = 2030", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 49, "p_dot": 0.7709451138507575, "score_xfmt": 0.9819416435688343, "score_capint": null}
{"statement": "64 + 67 = 131", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 107, "p_dot": 0.7904759717930574, "score_xfmt": 0.7944472017594154, "score_capint": null}
{"statement": "31 + 18 = 47", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6606505066156387, "score_xfmt": null, "score_capint": null}
{"statement": "27 x 7 = 189", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7700570547021925, "score_xfmt": null, "score_capint": null}
{"statement": "73 + 9 = 93", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4535426098154858, "score_xfmt": null, "score_capint": null}
{"statement": "81 - 53 = 24", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2758669019676745, "score_xfmt": null, "score_capint": null}
{"statement": "41 + 85 = 120", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5672988505102694, "score_xfmt": null, "score_capint": null}
{"statement": "29 - 12 = 17", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5006988636450842, "score_xfmt": null, "score_capint": null}
{"statement": "17 x 97 = 1633", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6713980752974749, "score_xfmt": null, "score_capint": null}
{"statement": "8 + 86 = 94", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5958377839997411, "score_xfmt": null, "score_capint": null}
{"statement": "48 + 43 = 107", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6292102544102818, "score_xfmt": null, "score_capint": null}
{"statement": "58 x 32 = 1857", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 80, "p_dot": 0.5470335744321346, "score_xfmt": 2.524854452108504e-07, "score_capint": null}
{"statement": "16 x 55 = 882", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5493771695764735, "score_xfmt": null, "score_capint": null}
{"statement": "96 x 34 = 3264", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7528758691623807, "score_xfmt": null, "score_capint": null}
{"statement": "62 x 8 = 496", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.772904854035005, "score_xfmt": null, "score_capint": null}
{"statement": "69 - 90 = -21", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 5, "p_dot": 0.5552436396246776, "score_xfmt": 0.9692475818320341, "score_capint": null}
{"statement": "17 - 8 = -5", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5113580302568153, "score_xfmt": null, "score_capint": null}
{"statement": "47 x 8 = 376", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5725703198695555, "score_xfmt": null, "score_capint": null}
{"statement": "45 + 64 = 99", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6435343094635755, "score_xfmt": null, "score_capint": null}
{"statement": "36 - 20 = 16", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5927796362666413, "score_xfmt": null, "score_capint": null}
{"statement": "77 x 77 = 5929", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 30, "p_dot": 0.8292866114061326, "score_xfmt": 0.9614031216994839, "score_capint": null}
{"statement": "78 x 5 = 373", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4118343540467322, "score_xfmt": null, "score_capint": null}
{"statement": "70 + 12 = 82", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7453129179775715, "score_xfmt": null, "score_capint": null}
{"statement": "0 - 49 = -49", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.27016259625088423, "score_xfmt": null, "score_capint": null}
{"statement": "2 x 97 = 194", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6244890171801671, "score_xfmt": null, "score_capint": null}
{"statement": "54 - 88 = -19", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3321421048603952, "score_xfmt": null, "score_capint": null}
{"statement": "85 - 82 = 3", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.44737655494827777, "score_xfmt": null, "score_capint": null}
{"statement": "49 - 78 = -21", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 102, "p_dot": 0.3241915460675955, "score_xfmt": 1.0780198079676322e-05, "score_capint": null}
{"statement": "17 x 59 = 1003", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7781260963529348, "score_xfmt": null, "score_capint": null}
{"statement": "72 x 97 = 6999", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7350420816801488, "score_xfmt": null, "score_capint": null}
{"statement": "24 - 36 = -10", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 11, "p_dot": 0.16968729719519615, "score_xfmt": 9.78244392692802e-09, "score_capint": null}
{"statement": "40 x 27 = 1080", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7457059333100915, "score_xfmt": null, "score_capint": null}
{"statement": "12 + 56 = 85", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.599599250301253, "score_xfmt": null, "score_capint": null}
{"statement": "23 x 63 = 1433", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6620037027169019, "score_xfmt": null, "score_capint": null}
{"statement": "43 x 26 = 1118", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7546660176012665, "score_xfmt": null, "score_capint": null}
{"statement": "79 x 46 = 3634", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7380397240631282, "score_xfmt": null, "score_capint": null}
{"statement": "81 x 33 = 2673", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7286050647962838, "score_xfmt": null, "score_capint": null}
{"statement": "62 + 32 = 94", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6309414893621579, "score_xfmt": null, "score_capint": null}
{"statement": "91 x 37 = 3358", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6788355617318302, "score_xfmt": null, "score_capint": null}
{"statement": "63 + 14 = 85", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6351770020555705, "score_xfmt": null, "score_capint": null}
{"statement": "45 x 28 = 1267", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5810823887586594, "score_xfmt": null, "score_capint": null}
{"statement": "31 x 74 = 2294", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.737932626681868, "score_xfmt": null, "score_capint": null}
{"statement": "78 - 68 = 10", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.43652919156011194, "score_xfmt": null, "score_capint": null}
{"statement": "91 + 25 = 116", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7172503666952252, "score_xfmt": null, "score_capint": null}
{"statement": "100 - 64 = 35", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4202529307221994, "score_xfmt": null, "score_capint": null}
{"statement": "47 x 59 = 2778", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7734502551611513, "score_xfmt": null, "score_capint": null}
{"statement": "75 - 54 = 40", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 84, "p_dot": 0.28045525727793574, "score_xfmt": 1.2210570798555404e-05, "score_capint": null}
{"statement": "23 x 88 = 2030", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 79, "p_dot": 0.6347073330543935, "score_xfmt": 0.0045783187187528425, "score_capint": null}
{"statement": "77 x 23 = 1761", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7664267485961318, "score_xfmt": null, "score_capint": null}
{"statement": "59 x 55 = 3245", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8270255501847714, "score_xfmt": null, "score_capint": null}
{"statement": "62 + 71 = 149", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6388320029363967, "score_xfmt": null, "score_capint": null}
{"statement": "54 - 70 = -16", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5249182407278568, "score_xfmt": null, "score_capint": null}
{"statement": "62 + 57 = 116", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6337449622806162, "score_xfmt": null, "score_capint": null}
{"statement": "100 - 5 = 95", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7253000086639076, "score_xfmt": null, "score_capint": null}
{"statement": "3 - 49 = -46", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7110194338019937, "score_xfmt": null, "score_capint": null}
{"statement": "52 - 41 = 11", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5630420978413895, "score_xfmt": null, "score_capint": null}
{"statement": "39 + 23 = 73", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5827624136582017, "score_xfmt": null, "score_capint": null}
{"statement": "79 + 19 = 98", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6900771844666451, "score_xfmt": null, "score_capint": null}
{"statement": "99 + 52 = 162", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 99, "p_dot": 0.5117008979432285, "score_xfmt": 5.372146894648618e-07, "score_capint": null}
{"statement": "80 + 77 = 151", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.678583180939313, "score_xfmt": null, "score_capint": null}
{"statement": "82 + 18 = 96", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5695095321862027, "score_xfmt": null, "score_capint": null}
{"statement": "98 - 35 = 63", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7348640625132248, "score_xfmt": null, "score_capint": null}
{"statement": "67 - 46 = 32", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.19979794835671782, "score_xfmt": null, "score_capint": null}
{"statement": "37 x 2 = 61", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3885076348669827, "score_xfmt": null, "score_capint": null}
{"statement": "92 x 84 = 7723", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6509867415297776, "score_xfmt": null, "score_capint": null}
{"statement": "19 + 34 = 50", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.48752239951863885, "score_xfmt": null, "score_capint": null}
{"statement": "86 + 39 = 125", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 73, "p_dot": 0.7346832500770688, "score_xfmt": 0.9875392990462691, "score_capint": null}
{"statement": "6 - 6 = 16", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.2779039430897683, "score_xfmt": null, "score_capint": null}
{"statement": "1 x 85 = 85", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.5506763374432921, "score_xfmt": null, "score_capint": null}
{"statement": "47 + 97 = 144", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7256876149913296, "score_xfmt": null, "score_capint": null}
{"statement": "64 + 19 = 68", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5216182775329798, "score_xfmt": null, "score_capint": null}
{"statement": "12 x 91 = 1103", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6864975381176919, "score_xfmt": null, "score_capint": null}
{"statement": "33 + 6 = 53", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3907017116434872, "score_xfmt": null, "score_capint": null}
{"statement": "94 x 21 = 1974", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7111808122135699, "score_xfmt": null, "score_capint": null}
{"statement": "65 + 75 = 155", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5931991750840098, "score_xfmt": null, "score_capint": null}
{"statement": "90 x 54 = 4860", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8472353664692491, "score_xfmt": null, "score_capint": null}
{"statement": "70 - 91 = -8", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.22611405118368566, "score_xfmt": null, "score_capint": null}
{"statement": "38 + 10 = 52", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 52, "p_dot": 0.5610188118298538, "score_xfmt": 0.009793965658943526, "score_capint": null}
{"statement": "96 + 79 = 172", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6552733217831701, "score_xfmt": null, "score_capint": null}
{"statement": "15 + 48 = 63", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 16, "p_dot": 0.770760633982718, "score_xfmt": 0.9966177024749148, "score_capint": null}
{"statement": "64 x 39 = 2496", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 111, "p_dot": 0.676245047012344, "score_xfmt": 0.9807348973606432, "score_capint": null}
{"statement": "90 - 28 = 79", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4066466469084844, "score_xfmt": null, "score_capint": null}
{"statement": "93 + 31 = 124", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 55, "p_dot": 0.7525708243483678, "score_xfmt": 0.9990472158828321, "score_capint": null}
{"statement": "53 x 68 = 3604", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8454941400559619, "score_xfmt": null, "score_capint": null}
{"statement": "35 + 5 = 40", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 114, "p_dot": 0.5640590855618939, "score_xfmt": 0.9980145869092009, "score_capint": null}
{"statement": "86 x 96 = 8261", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 76, "p_dot": 0.78471510251984, "score_xfmt": 0.002067167349951597, "score_capint": null}
{"statement": "42 + 34 = 64", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3875778147485107, "score_xfmt": null, "score_capint": null}
{"statement": "29 + 28 = 57", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7979033680167049, "score_xfmt": null, "score_capint": null}
{"statement": "83 x 89 = 7374", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6617384222336113, "score_xfmt": null, "score_capint": null}
{"statement": "67 x 32 = 2144", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7569958590902388, "score_xfmt": null, "score_capint": null}
{"statement": "81 x 35 = 2835", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7671979628503323, "score_xfmt": null, "score_capint": null}
{"statement": "16 x 60 = 960", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7421910617267713, "score_xfmt": null, "score_capint": null}
{"statement": "83 - 86 = 7", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 21, "p_dot": 0.1503095383523032, "score_xfmt": 0.000535252312989929, "score_capint": null}
{"statement": "46 - 20 = 26", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.43012245930731297, "score_xfmt": null, "score_capint": null}
{"statement": "5 x 6 = 40", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5783513141213916, "score_xfmt": null, "score_capint": null}
{"statement": "56 + 9 = 76", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.42529182345606387, "score_xfmt": null, "score_capint": null}
{"statement": "93 x 100 = 9300", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8691773827886209, "score_xfmt": null, "score_capint": null}
{"statement": "97 + 96 = 209", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5279104843502864, "score_xfmt": null, "score_capint": null}
{"statement": "78 x 48 = 3736", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7463950281962752, "score_xfmt": null, "score_capint": null}
{"statement": "77 + 33 = 110", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7450609783409163, "score_xfmt": null, "score_capint": null}
{"statement": "36 - 49 = -2", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.23101771413348615, "score_xfmt": null, "score_capint": null}
{"statement": "40 - 91 = -39", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.3364895679987967, "score_xfmt": null, "score_capint": null}
{"statement": "82 x 98 = 8045", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7895606351085007, "score_xfmt": null, "score_capint": null}
{"statement": "5 - 52 = -39", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4882775928126648, "score_xfmt": null, "score_capint": null}
{"statement": "81 - 51 = 47", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.32938793930225074, "score_xfmt": null, "score_capint": null}
{"statement": "65 + 73 = 133", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6421184376813471, "score_xfmt": null, "score_capint": null}
{"statement": "6 - 30 = -44", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 82, "p_dot": 0.45865618938114494, "score_xfmt": 0.00024258521238572364, "score_capint": null}
{"statement": "70 - 4 = 63", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.38752455287612975, "score_xfmt": null, "score_capint": null}
{"statement": "51 + 88 = 149", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6830828223610297, "score_xfmt": null, "score_capint": null}
{"statement": "8 - 81 = -63", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4624616615474224, "score_xfmt": null, "score_capint": null}
{"statement": "30 + 20 = 50", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.7080999887548387, "score_xfmt": null, "score_capint": null}
{"statement": "33 + 84 = 111", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5853592408820987, "score_xfmt": null, "score_capint": null}
{"statement": "34 x 64 = 2160", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.6953069476876408, "score_xfmt": null, "score_capint": null}
{"statement": "17 + 33 = 50", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6907807998359203, "score_xfmt": null, "score_capint": null}
{"statement": "2 + 45 = 35", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.38278489117510617, "score_xfmt": null, "score_capint": null}
{"statement": "42 + 98 = 140", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6947875638725236, "score_xfmt": null, "score_capint": null}
{"statement": "39 - 95 = -64", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 119, "p_dot": 0.38010841712821275, "score_xfmt": 2.3782693717927422e-07, "score_capint": null}
{"statement": "61 + 80 = 136", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.5727898405166343, "score_xfmt": null, "score_capint": null}
{"statement": "67 x 66 = 4416", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.7223344797967002, "score_xfmt": null, "score_capint": null}
{"statement": "62 - 11 = 39", "dataset": "arithmetic", "split": "arith_train", "correct": false, "eval_order": null, "p_dot": 0.4365153528051451, "score_xfmt": null, "score_capint": null}
{"statement": "78 x 83 = 6474", "dataset": "arithmetic", "split": "arith_test", "correct": true, "eval_order": 103, "p_dot": 0.7675316506647505, "score_xfmt": 0.90592918477167, "score_capint": null}
{"statement": "27 x 72 = 1944", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.6987596453400329, "score_xfmt": null, "score_capint": null}
{"statement": "74 - 51 = 23", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.4014226844301447, "score_xfmt": null, "score_capint": null}
{"statement": "63 x 50 = 3150", "dataset": "arithmetic", "split": "arith_train", "correct": true, "eval_order": null, "p_dot": 0.8338417989143636, "score_xfmt": null, "score_capint": null}
{"statement": "10 + 47 = 56", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 53, "p_dot": 0.5087290896335617, "score_xfmt": 0.0005428375174558058, "score_capint": null}
{"statement": "82 + 45 = 114", "dataset": "arithmetic", "split": "arith_test", "correct": false, "eval_order": 72, "p_dot": 0.5890900081722066, "score_xfmt": 0.0030123864132498043, "score_capint": null}
{"statement": "The capital of Syria is Kabul", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 0, "p_dot": 0.6517039256868884, "score_xfmt": 7.818929089555501e-17, "score_capint": 0.002247410497417064}
{"statement": "The capital of Egypt is Gaborone", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 1, "p_dot": 0.6035117013379931, "score_xfmt": 3.676120150321481e-15, "score_capint": 1.1822846736947645e-05}
{"statement": "The capital of Mongolia is Cairo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 2, "p_dot": 0.6684102471917868, "score_xfmt": 2.6682897401902814e-10, "score_capint": 8.047843483867162e-05}
{"statement": "The capital of Belarus is Lima", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 3, "p_dot": 0.7080920577864163, "score_xfmt": 5.705905968683044e-16, "score_capint": 6.003205764602656e-05}
{"statement": "The capital of Lithuania is Maputo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 4, "p_dot": 0.6416652030893601, "score_xfmt": 1.6215397879292654e-17, "score_capint": 5.0721470733428126e-05}
{"statement": "The capital of Paraguay is Asuncion", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 5, "p_dot": 0.7833413335611112, "score_xfmt": 0.9909329479042601, "score_capint": 0.9998623712090858}
{"statement": "The capital of Croatia is Nouakchott", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 6, "p_dot": 0.6365928831510246, "score_xfmt": 2.351944082070245e-13, "score_capint": 0.00041164435913571673}
{"statement": "The capital of Zambia is Lusaka", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 7, "p_dot": 0.8032322416256648, "score_xfmt": 0.9985697985755965, "score_capint": 0.9986776678278134}
{"statement": "The capital of Benin is Yaounde", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 8, "p_dot": 0.6478187719476409, "score_xfmt": 1.1728256703314575e-11, "score_capint": 0.25159119125652357}
{"statement": "The capital of Thailand is Bangkok", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 9, "p_dot": 0.6824547632131726, "score_xfmt": 0.996821973295569, "score_capint": 0.9992468658508006}
{"statement": "The capital of Estonia is Jerusalem", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 10, "p_dot": 0.6991609313990921, "score_xfmt": 5.445662420275391e-17, "score_capint": 6.407791389494225e-06}
{"statement": "The capital of Sweden is Stockholm", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 11, "p_dot": 0.7338735205121338, "score_xfmt": 0.9960421333226411, "score_capint": 0.9993124535378012}
{"statement": "The capital of Serbia is Amsterdam", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 12, "p_dot": 0.5674248698633164, "score_xfmt": 6.188199482756914e-29, "score_capint": 3.23836207744268e-05}
{"statement": "The capital of Honduras is Tegucigalpa", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 13, "p_dot": 0.8052675941726193, "score_xfmt": 0.996843778820108, "score_capint": 0.9933053921248164}
{"statement": "The capital of Iraq is Ljubljana", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 14, "p_dot": 0.7617259781109169, "score_xfmt": 7.543020626770391e-05, "score_capint": 0.010088742274308537}
{"statement": "The capital of Peru is Panama City", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 15, "p_dot": 0.6285108962329105, "score_xfmt": 2.9677014412744775e-19, "score_capint": 0.0022238470667413253}
{"statement": "The capital of Turkey is Kigali", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 16, "p_dot": 0.6441039710771292, "score_xfmt": 7.340587324453132e-17, "score_capint": 0.000306615336579996}
{"statement": "The capital of Senegal is Canberra", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 17, "p_dot": 0.7296805039513856, "score_xfmt": 6.078564584628736e-14, "score_capint": 3.4522885466322395e-05}
{"statement": "The capital of Mongolia is Ulaanbaatar", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 18, "p_dot": 0.7069694427773356, "score_xfmt": 0.9995910298331907, "score_capint": 0.9997915636292063}
{"statement": "The capital of Cambodia is Phnom Penh", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 19, "p_dot": 0.796364494424779, "score_xfmt": 0.9988221272819098, "score_capint": 0.9968901651222335}
{"statement": "The capital of Afghanistan is Kabul", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 20, "p_dot": 0.7853515649330802, "score_xfmt": 0.9958338326671242, "score_capint": 0.999901117796261}
{"statement": "The capital of Armenia is Stockholm", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 21, "p_dot": 0.6387710210983641, "score_xfmt": 5.5051549965966486e-18, "score_capint": 0.00014034566592951056}
{"statement": "The capital of Laos is Panama City", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 22, "p_dot": 0.6282013384625316, "score_xfmt": 1.2143795348767346e-16, "score_capint": 0.00025053523751371147}
{"statement": "The capital of Ghana is San Jose", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 23, "p_dot": 0.48874089820310473, "score_xfmt": 8.176973758153556e-24, "score_capint": 1.734081552512255e-05}
{"statement": "The capital of Ecuador is Maputo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 24, "p_dot": 0.7231887555681169, "score_xfmt": 3.190952318833791e-11, "score_capint": 0.00020386356177400387}
{"statement": "The capital of Bangladesh is Mogadishu", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 25, "p_dot": 0.7001733318902552, "score_xfmt": 5.258591644523897e-11, "score_capint": 0.045889053381714076}
{"statement": "The capital of Eswatini is Mbabane", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 26, "p_dot": 0.5219137435778975, "score_xfmt": 1.780499698994658e-11, "score_capint": 0.9561232451963583}
{"statement": "The capital of Slovenia is Singapore", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 27, "p_dot": 0.5375652170623653, "score_xfmt": 1.5207186651853178e-41, "score_capint": 1.8224438002428105e-07}
{"statement": "The capital of Guinea is Harare", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 28, "p_dot": 0.6982093862025067, "score_xfmt": 1.5565234566602082e-07, "score_capint": 0.0009271444911684339}
{"statement": "The capital of China is Beijing", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 29, "p_dot": 0.7643057734239846, "score_xfmt": 0.9997336209011317, "score_capint": 0.9996879529900123}
{"statement": "The capital of Costa Rica is Oslo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 30, "p_dot": 0.7090833081747405, "score_xfmt": 4.583644841227787e-11, "score_capint": 0.0005964378303029956}
{"statement": "The capital of Moldova is Chisinau", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 31, "p_dot": 0.7931849889573641, "score_xfmt": 0.9988577378999581, "score_capint": 0.9970212263472588}
{"statement": "The capital of Guatemala is Tallinn", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 32, "p_dot": 0.8102404800592922, "score_xfmt": 6.96070499180704e-05, "score_capint": 8.747623218305201e-05}
{"statement": "The capital of Bulgaria is Kuala Lumpur", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 33, "p_dot": 0.6502902238280512, "score_xfmt": 9.707585619586066e-19, "score_capint": 0.000234660188055802}
{"statement": "The capital of Tunisia is Tunis", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 34, "p_dot": 0.7616431551286951, "score_xfmt": 0.9998374112855902, "score_capint": 0.9997657203937982}
{"statement": "The capital of Malaysia is Kuala Lumpur", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 35, "p_dot": 0.7077055177651346, "score_xfmt": 0.9987869725324059, "score_capint": 0.9999399015015312}
{"statement": "The capital of Cyprus is Bishkek", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 36, "p_dot": 0.6548665751470253, "score_xfmt": 2.6515595072332985e-17, "score_capint": 2.1825446481476535e-05}
{"statement": "The capital of Haiti is Port-au-Prince", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 37, "p_dot": 0.816630539833568, "score_xfmt": 0.9860169303648414, "score_capint": 0.9985808329314639}
{"statement": "The capital of Ukraine is Porto-Novo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 38, "p_dot": 0.4995937837520614, "score_xfmt": 7.155948764693945e-26, "score_capint": 1.5465914525788196e-05}
{"statement": "The capital of Slovakia is Colombo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 39, "p_dot": 0.6997614707215689, "score_xfmt": 1.2864314410005054e-13, "score_capint": 0.00020178034567004574}
{"statement": "The capital of South Africa is Pretoria", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 40, "p_dot": 0.4921820155577734, "score_xfmt": 0.020493816487740322, "score_capint": 0.9959299956527399}
{"statement": "The capital of China is Havana", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 41, "p_dot": 0.7043347543803975, "score_xfmt": 1.4848368594309635e-11, "score_capint": 0.007871556037712594}
{"statement": "The capital of Kazakhstan is Astana", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 42, "p_dot": 0.6337925448315218, "score_xfmt": 0.00019205001934374463, "score_capint": 0.9714672886771676}
{"statement": "The capital of Hungary is Budapest", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 43, "p_dot": 0.7531816756818444, "score_xfmt": 0.9999984223990358, "score_capint": 0.9992946738309252}
{"statement": "The capital of Kuwait is Kuwait City", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 44, "p_dot": 0.6470151244429871, "score_xfmt": 0.9999997615263891, "score_capint": 0.9999089597401057}
{"statement": "The capital of South Korea is Stockholm", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 45, "p_dot": 0.726789700682275, "score_xfmt": 1.141316887922062e-11, "score_capint": 0.04756322689487685}
{"statement": "The capital of Israel is Tallinn", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 46, "p_dot": 0.7724673219490796, "score_xfmt": 0.0674923200766978, "score_capint": 0.0023050382246921214}
{"statement": "The capital of Montenegro is Podgorica", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 47, "p_dot": 0.7407350419671275, "score_xfmt": 0.997127687625622, "score_capint": 0.9973533878424478}
{"statement": "The capital of Canada is Tallinn", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 48, "p_dot": 0.7251633169362321, "score_xfmt": 0.004487440239584288, "score_capint": 0.00487739502497299}
{"statement": "The capital of Bhutan is Thimphu", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 49, "p_dot": 0.7861317956703715, "score_xfmt": 0.999778697409185, "score_capint": 0.9984627104618585}
{"statement": "The capital of Ireland is Dublin", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 50, "p_dot": 0.7831496298313141, "score_xfmt": 0.9889372200896419, "score_capint": 0.9982370201610977}
{"statement": "The capital of Mali is Caracas", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 51, "p_dot": 0.6045293036149815, "score_xfmt": 3.68994999850725e-14, "score_capint": 1.3009654634292416e-05}
{"statement": "The capital of Libya is Tripoli", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 52, "p_dot": 0.8020447014714591, "score_xfmt": 0.9943907248509893, "score_capint": 0.9885770940301496}
{"statement": "The capital of Thailand is Georgetown", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 53, "p_dot": 0.6105350938159972, "score_xfmt": 1.1905030254432883e-13, "score_capint": 0.00766013282531489}
{"statement": "The capital of Belgium is Brussels", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 54, "p_dot": 0.7462527493480593, "score_xfmt": 0.9328613993157809, "score_capint": 0.9999477000834807}
{"statement": "The capital of Montenegro is Tehran", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 55, "p_dot": 0.8272972102276981, "score_xfmt": 1.7864966123501113e-08, "score_capint": 0.0007439575833377336}
{"statement": "The capital of Nepal is Moscow", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 56, "p_dot": 0.7951975223259069, "score_xfmt": 3.2332294672864096e-08, "score_capint": 0.013380604028202738}
{"statement": "The capital of Bolivia is Skopje", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 57, "p_dot": 0.7571587571874261, "score_xfmt": 1.1564184421629573e-09, "score_capint": 0.00023959647976720281}
{"statement": "The capital of Botswana is Gaborone", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 58, "p_dot": 0.8049494482693262, "score_xfmt": 0.9931113203939166, "score_capint": 0.999714700385933}
{"statement": "The capital of Gabon is La Paz", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 59, "p_dot": 0.672240206040442, "score_xfmt": 4.2672653606173795e-13, "score_capint": 0.0014632847608245594}
{"statement": "The capital of Latvia is Hanoi", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 60, "p_dot": 0.6214502438087948, "score_xfmt": 3.120769947140596e-11, "score_capint": 0.00011482239229435196}
{"statement": "The capital of Cameroon is Seoul", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 61, "p_dot": 0.7366722275037318, "score_xfmt": 4.7016096575262246e-09, "score_capint": 0.008233602322129411}
{"statement": "The capital of Portugal is Lisbon", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 62, "p_dot": 0.8153437241562642, "score_xfmt": 0.9991073109705122, "score_capint": 0.9994962305102097}
{"statement": "The capital of Turkmenistan is Ashgabat", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 63, "p_dot": 0.7928560756263323, "score_xfmt": 0.9998212521882393, "score_capint": 0.9932452713039449}
{"statement": "The capital of Poland is Warsaw", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 64, "p_dot": 0.7397452170262113, "score_xfmt": 0.9959398444322107, "score_capint": 0.9996670025374823}
{"statement": "The capital of Japan is Sarajevo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 65, "p_dot": 0.6681850182358176, "score_xfmt": 1.2381768396693212e-07, "score_capint": 0.005281541337505041}
{"statement": "The capital of Luxembourg is Luxembourg City", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 66, "p_dot": 0.5424356951843947, "score_xfmt": 0.9999991029500532, "score_capint": 0.999474826872828}
{"statement": "The capital of Chad is Guatemala City", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 67, "p_dot": 0.5707411337643862, "score_xfmt": 1.3569949322138732e-18, "score_capint": 5.505881900082738e-05}
{"statement": "The capital of Lithuania is Vilnius", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 68, "p_dot": 0.7678318560356274, "score_xfmt": 0.9995379009590288, "score_capint": 0.999401545984724}
{"statement": "The capital of Qatar is Buenos Aires", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 69, "p_dot": 0.7522932749707252, "score_xfmt": 6.30449525108338e-08, "score_capint": 0.01479055603791359}
{"statement": "The capital of Panama is Abuja", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 70, "p_dot": 0.6752667478867806, "score_xfmt": 4.364727200703805e-10, "score_capint": 0.00031277603594738907}
{"statement": "The capital of Iceland is Reykjavik", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 71, "p_dot": 0.8029129269416444, "score_xfmt": 0.970552188490739, "score_capint": 0.9995645299265822}
{"statement": "The capital of Yemen is Kingston", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 72, "p_dot": 0.6396381668746471, "score_xfmt": 2.3756589228626165e-14, "score_capint": 0.0026375219017721542}
{"statement": "The capital of Ghana is Accra", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 73, "p_dot": 0.7383491728687659, "score_xfmt": 0.994875506094244, "score_capint": 0.9994138584683109}
{"statement": "The capital of Georgia is Windhoek", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 74, "p_dot": 0.5884478453081101, "score_xfmt": 3.904726372211126e-09, "score_capint": 0.0016576366823065646}
{"statement": "The capital of India is New Delhi", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 75, "p_dot": 0.7640512497164309, "score_xfmt": 0.6685933984161645, "score_capint": 0.9982616282074964}
{"statement": "The capital of Oman is Muscat", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 76, "p_dot": 0.731646970147267, "score_xfmt": 0.9998707254786245, "score_capint": 0.9997273393053159}
{"statement": "The capital of Argentina is Montevideo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 77, "p_dot": 0.8069713873555884, "score_xfmt": 0.004878772469408172, "score_capint": 0.37764981035462303}
{"statement": "The capital of Kuwait is Caracas", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 78, "p_dot": 0.6755033996887505, "score_xfmt": 2.8592291095554757e-11, "score_capint": 0.0001853500411018983}
{"statement": "The capital of Iran is Tehran", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 79, "p_dot": 0.8360615470446646, "score_xfmt": 0.9997626857881384, "score_capint": 0.9999080203824071}
{"statement": "The capital of South Africa is Copenhagen", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 80, "p_dot": 0.6995248079765588, "score_xfmt": 8.873715700317305e-13, "score_capint": 0.011212416232537}
{"statement": "The capital of Lesotho is Maseru", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 81, "p_dot": 0.8064947337843478, "score_xfmt": 0.9992365674621532, "score_capint": 0.9938979423021743}
{"statement": "The capital of Australia is Canberra", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 82, "p_dot": 0.7344446635106578, "score_xfmt": 0.9597260260409756, "score_capint": 0.9997935278328582}
{"statement": "The capital of Liberia is Bratislava", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 83, "p_dot": 0.7514282797346823, "score_xfmt": 1.051639618659188e-09, "score_capint": 0.0003089831110093562}
{"statement": "The capital of Honduras is Lisbon", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 84, "p_dot": 0.6771144153317437, "score_xfmt": 9.610063941222243e-12, "score_capint": 0.005868501637336435}
{"statement": "The capital of Syria is Damascus", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 85, "p_dot": 0.7846310257446021, "score_xfmt": 0.9998702795216284, "score_capint": 0.9964822202170616}
{"statement": "The capital of Oman is Amsterdam", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 86, "p_dot": 0.5941520272754133, "score_xfmt": 9.439407448514385e-30, "score_capint": 0.00024321112294448615}
{"statement": "The capital of Malta is Valletta", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 87, "p_dot": 0.7427669914904982, "score_xfmt": 0.9933868266740141, "score_capint": 0.9950470379512418}
{"statement": "The capital of Namibia is Windhoek", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 88, "p_dot": 0.8145027360878885, "score_xfmt": 0.999859805843556, "score_capint": 0.9996282331712271}
{"statement": "The capital of Malaysia is Riyadh", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 89, "p_dot": 0.7377660983474925, "score_xfmt": 1.449637218481429e-11, "score_capint": 0.028081292636072994}
{"statement": "The capital of Liberia is Monrovia", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 90, "p_dot": 0.7260573803214356, "score_xfmt": 0.9994394221482651, "score_capint": 0.9976504429443683}
{"statement": "The capital of Mali is Bamako", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 91, "p_dot": 0.7545900482218713, "score_xfmt": 0.9658803722475214, "score_capint": 0.9933175855359933}
{"statement": "The capital of Zambia is Apia", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 92, "p_dot": 0.6725932983099483, "score_xfmt": 9.395705194358393e-08, "score_capint": 0.0013805796405220855}
{"statement": "The capital of Algeria is Kathmandu", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 93, "p_dot": 0.7158698406419717, "score_xfmt": 1.4953101250569595e-08, "score_capint": 0.040499750990055684}
{"statement": "The capital of Netherlands is Amsterdam", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 94, "p_dot": 0.6699496948858723, "score_xfmt": 0.0002493393256732317, "score_capint": 0.9980207488486994}
{"statement": "The capital of Papua New Guinea is Berlin", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 95, "p_dot": 0.7367067252052948, "score_xfmt": 1.172448910428879e-10, "score_capint": 0.00047098519806229214}
{"statement": "The capital of Cuba is Havana", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 96, "p_dot": 0.7484332256717607, "score_xfmt": 0.9615631455952328, "score_capint": 0.9970611152634039}
{"statement": "The capital of Egypt is Cairo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 97, "p_dot": 0.7215050529921427, "score_xfmt": 0.9999763574795475, "score_capint": 0.9962510704661228}
{"statement": "The capital of Canada is Ottawa", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 98, "p_dot": 0.740972148720175, "score_xfmt": 0.9999913612932452, "score_capint": 0.9990585808142134}
{"statement": "The capital of Fiji is Seoul", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 99, "p_dot": 0.7589864256442524, "score_xfmt": 5.245017311633475e-09, "score_capint": 0.005460914387738501}
{"statement": "The capital of El Salvador is San Salvador", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 100, "p_dot": 0.7413475172361359, "score_xfmt": 0.9981335628555126, "score_capint": 0.9986253087407467}
{"statement": "The capital of Hungary is Gitega", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 101, "p_dot": 0.46738465456292033, "score_xfmt": 4.1091294405029844e-19, "score_capint": 2.3115922196183407e-06}
{"statement": "The capital of Argentina is Buenos Aires", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 102, "p_dot": 0.7709728034387808, "score_xfmt": 0.9918612494399449, "score_capint": 0.9979797899301108}
{"statement": "The capital of Sri Lanka is Colombo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 103, "p_dot": 0.6497865181299858, "score_xfmt": 4.041386531820662e-09, "score_capint": 0.8672573114755581}
{"statement": "The capital of Djibouti is Djibouti", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 104, "p_dot": 0.3585478910827078, "score_xfmt": 0.12693878032505834, "score_capint": 0.7880652224169011}
{"statement": "The capital of Slovakia is Bratislava", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 105, "p_dot": 0.7125742407515645, "score_xfmt": 0.9997831093226252, "score_capint": 0.9991625863886322}
{"statement": "The capital of Namibia is Doha", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 106, "p_dot": 0.655682938056998, "score_xfmt": 4.93152404434566e-16, "score_capint": 0.003921448843041476}
{"statement": "The capital of Italy is Canberra", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 107, "p_dot": 0.532161834766157, "score_xfmt": 1.8445707410271066e-14, "score_capint": 0.004532587183935971}
{"statement": "The capital of Benin is Porto-Novo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 108, "p_dot": 0.25255259216646664, "score_xfmt": 8.763932017008693e-19, "score_capint": 0.9526107830971287}
{"statement": "The capital of Uzbekistan is Vienna", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 109, "p_dot": 0.6972893852507696, "score_xfmt": 6.068333720337499e-09, "score_capint": 0.0059308103323557695}
{"statement": "The capital of Belgium is Lima", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 110, "p_dot": 0.7045182855799794, "score_xfmt": 8.023633104645325e-10, "score_capint": 0.002231606430297614}
{"statement": "The capital of Albania is Tirana", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 111, "p_dot": 0.7854332271963358, "score_xfmt": 0.4057841818182282, "score_capint": 0.9997885673446121}
{"statement": "The capital of Algeria is Algiers", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 112, "p_dot": 0.7488272175542079, "score_xfmt": 0.9995209254230676, "score_capint": 0.999616626001192}
{"statement": "The capital of Portugal is Gaborone", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 113, "p_dot": 0.5772570008412004, "score_xfmt": 4.1778117088308666e-14, "score_capint": 5.355826995021278e-06}
{"statement": "The capital of Madagascar is Antananarivo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 114, "p_dot": 0.8642839183739852, "score_xfmt": 0.9887147027230114, "score_capint": 0.9988663025714585}
{"statement": "The capital of Bulgaria is Sofia", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 115, "p_dot": 0.7960481808986515, "score_xfmt": 0.975799344862291, "score_capint": 0.9732147221269162}
{"statement": "The capital of United States is Baku", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 116, "p_dot": 0.7323695248924196, "score_xfmt": 2.361241684283308e-08, "score_capint": 0.00020463158806887472}
{"statement": "The capital of Jamaica is Yaounde", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 117, "p_dot": 0.6578914124984294, "score_xfmt": 1.2203170567005067e-09, "score_capint": 0.0124142880118546}
{"statement": "The capital of Libya is Dodoma", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 118, "p_dot": 0.6170336520299315, "score_xfmt": 3.5416735382061075e-21, "score_capint": 0.0022888400571398834}
{"statement": "The capital of Cuba is Mbabane", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 119, "p_dot": 0.6420498921070248, "score_xfmt": 3.737821046771994e-21, "score_capint": 2.6558997347687014e-05}
{"statement": "The capital of Samoa is Baku", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 120, "p_dot": 0.6456737961852923, "score_xfmt": 9.95532294357154e-17, "score_capint": 3.963461676059093e-05}
{"statement": "The capital of Pakistan is Islamabad", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 121, "p_dot": 0.7956651764689013, "score_xfmt": 0.956005258576532, "score_capint": 0.998805417096513}
{"statement": "The capital of Guinea is Conakry", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 122, "p_dot": 0.8069913652143441, "score_xfmt": 0.9890479269990712, "score_capint": 0.9977172228391931}
{"statement": "The capital of Haiti is Brussels", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 123, "p_dot": 0.6497230691602454, "score_xfmt": 6.686863694679995e-17, "score_capint": 3.4447956161987e-05}
{"statement": "The capital of United Arab Emirates is Washington", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 124, "p_dot": 0.5287786789704114, "score_xfmt": 0.0007736032076793474, "score_capint": 0.015059042041414318}
{"statement": "The capital of Ivory Coast is Cairo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 125, "p_dot": 0.6649115267209709, "score_xfmt": 2.468249114648307e-10, "score_capint": 0.000439547791950307}
{"statement": "The capital of Dominican Republic is Chisinau", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 126, "p_dot": 0.7275151057983749, "score_xfmt": 5.8663183923876335e-08, "score_capint": 8.179483042391294e-05}
{"statement": "The capital of Serbia is Belgrade", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 127, "p_dot": 0.7512815752415918, "score_xfmt": 0.9996170656343528, "score_capint": 0.9998947231278197}
{"statement": "The capital of Kyrgyzstan is Bishkek", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 128, "p_dot": 0.830882134148851, "score_xfmt": 0.9997849596407931, "score_capint": 0.99308935275513}
{"statement": "The capital of El Salvador is Reykjavik", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 129, "p_dot": 0.7320225598523393, "score_xfmt": 4.222490576153289e-09, "score_capint": 0.0008140221716855936}
{"statement": "The capital of Mauritania is Nouakchott", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 130, "p_dot": 0.8368818018643651, "score_xfmt": 0.9974572143324093, "score_capint": 0.9906179340952409}
{"statement": "The capital of Togo is Lome", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 131, "p_dot": 0.7268980681546964, "score_xfmt": 0.9977403604353018, "score_capint": 0.9990271475065235}
{"statement": "The capital of Myanmar is Canberra", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 132, "p_dot": 0.7026127502322197, "score_xfmt": 1.927680628186936e-09, "score_capint": 0.003506372227772021}
{"statement": "The capital of Angola is Pretoria", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 133, "p_dot": 0.6568895844975486, "score_xfmt": 1.2690696164714508e-13, "score_capint": 0.05393223537308104}
{"statement": "The capital of Uruguay is Conakry", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 134, "p_dot": 0.5816357785370201, "score_xfmt": 1.8297419698584324e-13, "score_capint": 0.0029436111715628187}
{"statement": "The capital of Mauritania is Caracas", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 135, "p_dot": 0.6042515719309449, "score_xfmt": 5.517936588087636e-15, "score_capint": 2.697352947368138e-05}
{"statement": "The capital of Russia is Niamey", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 136, "p_dot": 0.6494763046503067, "score_xfmt": 1.212321442794167e-07, "score_capint": 0.003172929033870397}
{"statement": "The capital of Austria is Tegucigalpa", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 137, "p_dot": 0.6489980391925201, "score_xfmt": 4.018328872256006e-06, "score_capint": 0.001813490382549974}
{"statement": "The capital of Burundi is Ulaanbaatar", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 138, "p_dot": 0.5780372533481568, "score_xfmt": 8.507202775906228e-19, "score_capint": 4.2221415886927755e-05}
{"statement": "The capital of Sierra Leone is Freetown", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 139, "p_dot": 0.831455722800456, "score_xfmt": 0.9997613108857601, "score_capint": 0.9989190909348049}
{"statement": "The capital of Tajikistan is Dushanbe", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 140, "p_dot": 0.8053543915157206, "score_xfmt": 0.9983739565816626, "score_capint": 0.997642623357422}
{"statement": "The capital of Azerbaijan is Ulaanbaatar", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 141, "p_dot": 0.6244667523424141, "score_xfmt": 1.734684930300546e-09, "score_capint": 0.0037071202251378267}
{"statement": "The capital of Burkina Faso is Antananarivo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 142, "p_dot": 0.669418211793527, "score_xfmt": 2.2071398777518011e-16, "score_capint": 0.0012120473562525012}
{"statement": "The capital of Panama is Panama City", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 143, "p_dot": 0.6606511154677719, "score_xfmt": 0.9999989016432376, "score_capint": 0.9998754141290965}
{"statement": "The capital of Mexico is Maseru", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 144, "p_dot": 0.6282717253779992, "score_xfmt": 8.762343382620518e-18, "score_capint": 1.1207165488251108e-05}
{"statement": "The capital of Eritrea is Ouagadougou", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 145, "p_dot": 0.7610787423909642, "score_xfmt": 1.3021454066474344e-07, "score_capint": 0.0019000545021867713}
{"statement": "The capital of Bahrain is Manama", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 146, "p_dot": 0.7382130658952519, "score_xfmt": 0.9734878267639516, "score_capint": 0.9998458146151854}
{"statement": "The capital of Uruguay is Montevideo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 147, "p_dot": 0.833897499833256, "score_xfmt": 0.9977576442694508, "score_capint": 0.9990930770182627}
{"statement": "The capital of Pakistan is Dakar", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 148, "p_dot": 0.6837727291276678, "score_xfmt": 5.236972717400035e-09, "score_capint": 0.004380029410367094}
{"statement": "The capital of Israel is Jerusalem", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 149, "p_dot": 0.7955558575922623, "score_xfmt": 0.9966684317752902, "score_capint": 0.9986351080479327}
{"statement": "The capital of United Kingdom is London", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 150, "p_dot": 0.7484638507012278, "score_xfmt": 0.9717073516444195, "score_capint": 0.9968971682443589}
{"statement": "The capital of Dominican Republic is Santo Domingo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 151, "p_dot": 0.8188984672888182, "score_xfmt": 0.9968176757383543, "score_capint": 0.9936446228612064}
{"statement": "The capital of Somalia is Yerevan", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 152, "p_dot": 0.6873170884791762, "score_xfmt": 9.566389713113487e-11, "score_capint": 0.00021744044422933258}
{"statement": "The capital of Malawi is Lilongwe", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 153, "p_dot": 0.8038106574676931, "score_xfmt": 0.995444563653001, "score_capint": 0.9989428098244297}
{"statement": "The capital of Bahamas is Asuncion", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 154, "p_dot": 0.6250818877015263, "score_xfmt": 2.065628784772999e-16, "score_capint": 0.16521296434513555}
{"statement": "The capital of Vietnam is Abu Dhabi", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 155, "p_dot": 0.6218108112225309, "score_xfmt": 1.0938967709383114e-19, "score_capint": 0.0002191414983397282}
{"statement": "The capital of Bhutan is Pretoria", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 156, "p_dot": 0.7378202154650353, "score_xfmt": 7.36847733324831e-16, "score_capint": 9.530005310359032e-05}
{"statement": "The capital of Kazakhstan is Naypyidaw", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 157, "p_dot": 0.4203891612123698, "score_xfmt": 2.223889519626832e-38, "score_capint": 9.439748379674521e-07}
{"statement": "The capital of Burundi is Gitega", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 158, "p_dot": 0.6572040177416056, "score_xfmt": 0.9820760123052382, "score_capint": 0.9948690776122038}
{"statement": "The capital of Czech Republic is Prague", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 159, "p_dot": 0.8111005871905945, "score_xfmt": 0.9263370754736852, "score_capint": 0.9963257261550861}
{"statement": "The capital of North Korea is Riga", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 160, "p_dot": 0.7542286933166906, "score_xfmt": 2.9972824968268678e-06, "score_capint": 0.0006911116849130916}
{"statement": "The capital of Bosnia and Herzegovina is Sarajevo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 161, "p_dot": 0.6612233273917809, "score_xfmt": 0.9999963663791629, "score_capint": 0.994348582617372}
{"statement": "The capital of Zimbabwe is Harare", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 162, "p_dot": 0.7837587810354307, "score_xfmt": 0.9963717822534638, "score_capint": 0.9977662895502427}
{"statement": "The capital of Lebanon is Monrovia", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 163, "p_dot": 0.6003843735670671, "score_xfmt": 2.3092203366570707e-19, "score_capint": 0.0036998552346193306}
{"statement": "The capital of Cambodia is Riga", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 164, "p_dot": 0.7247322180774063, "score_xfmt": 1.3937596747959735e-10, "score_capint": 2.917048077967093e-05}
{"statement": "The capital of Belarus is Minsk", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 165, "p_dot": 0.8175636241794564, "score_xfmt": 0.9911009844638816, "score_capint": 0.9991281380351422}
{"statement": "The capital of Cyprus is Nicosia", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 166, "p_dot": 0.7344992092694156, "score_xfmt": 0.9946750362943405, "score_capint": 0.9361091446454347}
{"statement": "The capital of Guyana is Tehran", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 167, "p_dot": 0.7110200547613204, "score_xfmt": 1.2023119227867758e-15, "score_capint": 0.00015893716611116226}
{"statement": "The capital of Madagascar is Hanoi", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 168, "p_dot": 0.6722879576263949, "score_xfmt": 1.7457699213301202e-09, "score_capint": 0.0008814439568083848}
{"statement": "The capital of Ecuador is Quito", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 169, "p_dot": 0.7628113206010312, "score_xfmt": 0.9988238350048874, "score_capint": 0.9966080938619535}
{"statement": "The capital of Malta is Riyadh", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 170, "p_dot": 0.7924930846202187, "score_xfmt": 5.309948223350385e-11, "score_capint": 0.0015896436656011215}
{"statement": "The capital of Cameroon is Yaounde", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 171, "p_dot": 0.8265559690189548, "score_xfmt": 0.9708074802996437, "score_capint": 0.9998985781160935}
{"statement": "The capital of Chile is Yamoussoukro", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 172, "p_dot": 0.520963984890841, "score_xfmt": 1.459126721649968e-26, "score_capint": 7.3597285873835235e-06}
{"statement": "The capital of Sweden is Valletta", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 173, "p_dot": 0.5910714710480534, "score_xfmt": 2.285232161235592e-13, "score_capint": 0.0009869605319158523}
{"statement": "The capital of Bahamas is Nassau", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 174, "p_dot": 0.7247370433760807, "score_xfmt": 0.9993504975143829, "score_capint": 0.9989347655111157}
{"statement": "The capital of Sierra Leone is Dhaka", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 175, "p_dot": 0.6125149953877553, "score_xfmt": 9.592412299707522e-19, "score_capint": 4.4240788130593946e-05}
{"statement": "The capital of Bangladesh is Dhaka", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 176, "p_dot": 0.7936728586209938, "score_xfmt": 0.9967217744934306, "score_capint": 0.9998189845140293}
{"statement": "The capital of Albania is Niamey", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 177, "p_dot": 0.6934979964280501, "score_xfmt": 1.160702804834898e-05, "score_capint": 0.00427830926937955}
{"statement": "The capital of Tanzania is New Delhi", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 178, "p_dot": 0.5991940714302473, "score_xfmt": 3.0904316078472437e-18, "score_capint": 0.003975629379566835}
{"statement": "The capital of Papua New Guinea is Port Moresby", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 179, "p_dot": 0.794438591983635, "score_xfmt": 0.9999293488807643, "score_capint": 0.9991683941456111}
{"statement": "The capital of Philippines is Manila", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 180, "p_dot": 0.7796604892937467, "score_xfmt": 0.038870241806277274, "score_capint": 0.9828822533976163}
{"statement": "The capital of Netherlands is Apia", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 181, "p_dot": 0.7209230044391006, "score_xfmt": 2.6504121135339945e-07, "score_capint": 2.238165001630449e-05}
{"statement": "The capital of Eritrea is Asmara", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 182, "p_dot": 0.7592523725470528, "score_xfmt": 0.9128201685533937, "score_capint": 0.9924560268600201}
{"statement": "The capital of Kenya is Nairobi", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 183, "p_dot": 0.8125987832900137, "score_xfmt": 0.9948728170747039, "score_capint": 0.9995656027592307}
{"statement": "The capital of Bolivia is La Paz", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 184, "p_dot": 0.48646746337180957, "score_xfmt": 0.09246386651322112, "score_capint": 0.9989748154062912}
{"statement": "The capital of Samoa is Apia", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 185, "p_dot": 0.769073101459071, "score_xfmt": 0.9980391279656409, "score_capint": 0.999219185456921}
{"statement": "The capital of France is Paris", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 186, "p_dot": 0.8254413989780005, "score_xfmt": 0.99938873714121, "score_capint": 0.999007783450704}
{"statement": "The capital of Bahrain is Antananarivo", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 187, "p_dot": 0.7067044926807284, "score_xfmt": 1.6634221972162776e-09, "score_capint": 0.006907734293693599}
{"statement": "The capital of France is Ashgabat", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 188, "p_dot": 0.6864182834979147, "score_xfmt": 6.328261680847733e-07, "score_capint": 0.006329820940008976}
{"statement": "The capital of Paraguay is Yerevan", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 189, "p_dot": 0.7291471106000245, "score_xfmt": 2.3139536594643918e-08, "score_capint": 0.0011987211944294947}
{"statement": "The capital of Germany is Panama City", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 190, "p_dot": 0.5855131926946342, "score_xfmt": 1.171473307835145e-14, "score_capint": 0.0013975790042005075}
{"statement": "The capital of Uzbekistan is Tashkent", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 191, "p_dot": 0.8103901596041396, "score_xfmt": 0.9915634695653536, "score_capint": 0.9976334366961873}
{"statement": "The capital of Germany is Berlin", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 192, "p_dot": 0.7923612019512802, "score_xfmt": 0.9967331634764831, "score_capint": 0.9993345523710233}
{"statement": "The capital of Mexico is Mexico City", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 193, "p_dot": 0.6680669309571385, "score_xfmt": 0.999978856547667, "score_capint": 0.9998317804816363}
{"statement": "The capital of Japan is Tokyo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 194, "p_dot": 0.7803719418589026, "score_xfmt": 0.9999947227472754, "score_capint": 0.9999063141433829}
{"statement": "The capital of Tanzania is Dodoma", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 195, "p_dot": 0.4920490339281969, "score_xfmt": 3.669949512762417e-16, "score_capint": 0.9994073294892778}
{"statement": "The capital of Togo is Prague", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 196, "p_dot": 0.7746211712947115, "score_xfmt": 3.9502146944567064e-11, "score_capint": 2.6003973366244812e-05}
{"statement": "The capital of New Zealand is Mexico City", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 197, "p_dot": 0.6333158510969952, "score_xfmt": 5.213776184794689e-18, "score_capint": 0.0005083616127546047}
{"statement": "The capital of Rwanda is Kigali", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 198, "p_dot": 0.8177835895912722, "score_xfmt": 0.999704459351055, "score_capint": 0.9987596272502225}
{"statement": "The capital of Moldova is Moscow", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 199, "p_dot": 0.6324392545502633, "score_xfmt": 5.6539591659296026e-14, "score_capint": 0.005264708721307266}
{"statement": "The capital of Luxembourg is Tbilisi", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 200, "p_dot": 0.7829550601891242, "score_xfmt": 0.00031647197214370286, "score_capint": 0.0045204870711593485}
{"statement": "The capital of Laos is Vientiane", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 201, "p_dot": 0.7820699759759009, "score_xfmt": 0.9996051955461316, "score_capint": 0.9992387845051647}
{"statement": "The capital of Saudi Arabia is Riyadh", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 202, "p_dot": 0.8187816395657137, "score_xfmt": 0.999413663782679, "score_capint": 0.999552342376748}
{"statement": "The capital of Czech Republic is Suva", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 203, "p_dot": 0.5206187695730478, "score_xfmt": 9.27796618491461e-16, "score_capint": 2.90750228552395e-06}
{"statement": "The capital of Armenia is Yerevan", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 204, "p_dot": 0.7928118280833587, "score_xfmt": 0.9998103335391906, "score_capint": 0.9946628712175006}
{"statement": "The capital of Morocco is Bamako", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 205, "p_dot": 0.5518911603139713, "score_xfmt": 7.536068490198729e-19, "score_capint": 1.2347652765166873e-05}
{"statement": "The capital of Guatemala is Guatemala City", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 206, "p_dot": 0.6693784985691309, "score_xfmt": 0.9999969080484273, "score_capint": 0.9998982923288576}
{"statement": "The capital of Ivory Coast is Yamoussoukro", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 207, "p_dot": 0.3762762196129188, "score_xfmt": 7.3137222917164075e-09, "score_capint": 0.9908379154694468}
{"statement": "The capital of Chad is N'Djamena", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 208, "p_dot": 0.8032359820208512, "score_xfmt": 0.9943638655448234, "score_capint": 0.9885083323245886}
{"statement": "The capital of Morocco is Rabat", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 209, "p_dot": 0.7200860960874707, "score_xfmt": 0.9994801730874849, "score_capint": 0.9973853154818666}
{"statement": "The capital of Gabon is Libreville", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 210, "p_dot": 0.806459890678525, "score_xfmt": 0.9984385932472404, "score_capint": 0.9993331111447489}
{"statement": "The capital of United Kingdom is Lome", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 211, "p_dot": 0.7188840973540209, "score_xfmt": 4.922124928834847e-09, "score_capint": 0.00012814023978315875}
{"statement": "The capital of Lesotho is Conakry", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 212, "p_dot": 0.6827308106003329, "score_xfmt": 5.67267600444136e-09, "score_capint": 0.0029739673208128287}
{"statement": "The capital of Vietnam is Hanoi", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 213, "p_dot": 0.7198101929971017, "score_xfmt": 0.9999323155005891, "score_capint": 0.999896622883557}
{"statement": "The capital of India is Riga", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 214, "p_dot": 0.7841675888048485, "score_xfmt": 7.1061312841395665e-06, "score_capint": 0.0011688420686903208}
{"statement": "The capital of Singapore is Belgrade", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 215, "p_dot": 0.7743091098964214, "score_xfmt": 1.5283625582753166e-07, "score_capint": 0.008845727878244376}
{"statement": "The capital of Saudi Arabia is Djibouti", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 216, "p_dot": 0.8048337117070332, "score_xfmt": 5.521121504787709e-07, "score_capint": 0.012203545270380957}
{"statement": "The capital of Kenya is Pyongyang", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 217, "p_dot": 0.7604413812514395, "score_xfmt": 1.3388088504541213e-12, "score_capint": 0.00021462039692453943}
{"statement": "The capital of Switzerland is Moscow", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 218, "p_dot": 0.7573093333630823, "score_xfmt": 5.94952715649929e-09, "score_capint": 0.005345981408391088}
{"statement": "The capital of Spain is Tunis", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 219, "p_dot": 0.6541841991129331, "score_xfmt": 3.2903806491955647e-16, "score_capint": 8.503266249735784e-05}
{"statement": "The capital of South Korea is Seoul", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 220, "p_dot": 0.7884811642579734, "score_xfmt": 0.9999866808410602, "score_capint": 0.9999041290109111}
{"statement": "The capital of Singapore is Singapore", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 221, "p_dot": 0.6003093512263149, "score_xfmt": 0.7119500626394363, "score_capint": 0.9972192172752625}
{"statement": "The capital of Australia is Riga", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 222, "p_dot": 0.7490316879120655, "score_xfmt": 1.6811668444129092e-08, "score_capint": 0.0019370240220887265}
{"statement": "The capital of Finland is Helsinki", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 223, "p_dot": 0.7595810416969471, "score_xfmt": 0.9999895559933435, "score_capint": 0.9990742832752886}
{"statement": "The capital of Costa Rica is San Jose", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 224, "p_dot": 0.7840439330902882, "score_xfmt": 0.994860451950627, "score_capint": 0.9948081802644252}
{"statement": "The capital of Venezuela is Gitega", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 225, "p_dot": 0.49732925463467836, "score_xfmt": 2.536748104721751e-21, "score_capint": 1.7238973611747674e-06}
{"statement": "The capital of Djibouti is La Paz", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 226, "p_dot": 0.6469738670857623, "score_xfmt": 9.579848409622335e-17, "score_capint": 0.0005113302787426556}
{"statement": "The capital of Ethiopia is Brasilia", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 227, "p_dot": 0.6496205138973892, "score_xfmt": 3.4785369975853986e-15, "score_capint": 6.956794478952639e-06}
{"statement": "The capital of Italy is Rome", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 228, "p_dot": 0.7560799809871241, "score_xfmt": 0.999205457716231, "score_capint": 0.9955239982872992}
{"statement": "The capital of Georgia is Tbilisi", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 229, "p_dot": 0.7595924744964577, "score_xfmt": 0.9967109681866335, "score_capint": 0.9980709780944546}
{"statement": "The capital of Ireland is Luanda", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 230, "p_dot": 0.6817527674720623, "score_xfmt": 1.5541924726904955e-09, "score_capint": 0.0005792059778737869}
{"statement": "The capital of Denmark is Copenhagen", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 231, "p_dot": 0.743301531067118, "score_xfmt": 0.9999481772874077, "score_capint": 0.9999650371186993}
{"statement": "The capital of Jamaica is Kingston", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 232, "p_dot": 0.7921314114937559, "score_xfmt": 0.9988883103362012, "score_capint": 0.9998154154890994}
{"statement": "The capital of Zimbabwe is Lima", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 233, "p_dot": 0.7088625666801818, "score_xfmt": 2.1261933845005577e-15, "score_capint": 0.00013806066925206584}
{"statement": "The capital of Botswana is Abu Dhabi", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 234, "p_dot": 0.5693208987358958, "score_xfmt": 9.227677393400845e-23, "score_capint": 9.827498560095238e-05}
{"statement": "The capital of Kyrgyzstan is Reykjavik", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 235, "p_dot": 0.7566193442908116, "score_xfmt": 3.597828671008459e-05, "score_capint": 0.009068391808866145}
{"statement": "The capital of Iran is Nicosia", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 236, "p_dot": 0.6293216689955443, "score_xfmt": 1.198584237232689e-10, "score_capint": 0.0018561315992441797}
{"statement": "The capital of United States is Washington", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 237, "p_dot": 0.4800239480100572, "score_xfmt": 0.9930746534146562, "score_capint": 0.9751735691019148}
{"statement": "The capital of Sri Lanka is Abuja", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 238, "p_dot": 0.7596712380181998, "score_xfmt": 3.002688471771228e-05, "score_capint": 0.0012660062678723815}
{"statement": "The capital of North Macedonia is Tirana", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 239, "p_dot": 0.7254299102933146, "score_xfmt": 1.4617550192572204e-08, "score_capint": 0.0199043376525786}
{"statement": "The capital of United Arab Emirates is Abu Dhabi", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 240, "p_dot": 0.673826607177034, "score_xfmt": 0.7535066413878423, "score_capint": 0.9997622181858797}
{"statement": "The capital of Poland is Yaounde", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 241, "p_dot": 0.7455763893667608, "score_xfmt": 1.0450680261376551e-05, "score_capint": 0.013509179979760793}
{"statement": "The capital of Denmark is Harare", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 242, "p_dot": 0.7078809923259541, "score_xfmt": 1.1064717084984262e-08, "score_capint": 2.411358510782391e-05}
{"statement": "The capital of Norway is Lusaka", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 243, "p_dot": 0.6811821208102629, "score_xfmt": 1.6437445111694433e-08, "score_capint": 0.0002204314691425971}
{"statement": "The capital of Jordan is Guatemala City", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 244, "p_dot": 0.5526801703963429, "score_xfmt": 7.544776542805828e-20, "score_capint": 7.537008289812466e-05}
{"statement": "The capital of Greece is Athens", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 245, "p_dot": 0.7307266113348305, "score_xfmt": 0.9999753675099728, "score_capint": 0.9999591349709571}
{"statement": "The capital of Azerbaijan is Baku", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 246, "p_dot": 0.7146486606798135, "score_xfmt": 0.9999975094349781, "score_capint": 0.9930563170274056}
{"statement": "The capital of Fiji is Suva", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 247, "p_dot": 0.7137404901441187, "score_xfmt": 0.9994542152613051, "score_capint": 0.9954404085828595}
{"statement": "The capital of Switzerland is Bern", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 248, "p_dot": 0.8210917780525051, "score_xfmt": 0.9875874120621847, "score_capint": 0.9995373426366363}
{"statement": "The capital of Tajikistan is Prague", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 249, "p_dot": 0.8178420561016537, "score_xfmt": 7.855936183671665e-08, "score_capint": 0.00011502420187055267}
{"statement": "The capital of Sudan is Baghdad", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 250, "p_dot": 0.5334493702976033, "score_xfmt": 8.436558585028718e-23, "score_capint": 5.8750545562253055e-06}
{"statement": "The capital of Eswatini is London", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 251, "p_dot": 0.7654127976275049, "score_xfmt": 3.759849371993348e-09, "score_capint": 0.019595252436537137}
{"statement": "The capital of Malawi is Havana", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 252, "p_dot": 0.6857615943299606, "score_xfmt": 1.1193721439537533e-17, "score_capint": 0.00036193876605281153}
{"statement": "The capital of Latvia is Riga", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 253, "p_dot": 0.7300809100852348, "score_xfmt": 0.9999962659118189, "score_capint": 0.9994212920019703}
{"statement": "The capital of Jordan is Amman", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 254, "p_dot": 0.7922952527296729, "score_xfmt": 0.9994666810298859, "score_capint": 0.9931887252684131}
{"statement": "The capital of Yemen is Sanaa", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 255, "p_dot": 0.7754752125474624, "score_xfmt": 0.9038834676720588, "score_capint": 0.9976543843535137}
{"statement": "The capital of Turkey is Ankara", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 256, "p_dot": 0.7438975753029808, "score_xfmt": 0.9998912366516249, "score_capint": 0.9994344241520597}
{"statement": "The capital of New Zealand is Wellington", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 257, "p_dot": 0.7917813424719498, "score_xfmt": 0.9949677816497354, "score_capint": 0.9986930859551298}
{"statement": "The capital of Somalia is Mogadishu", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 258, "p_dot": 0.8298866795375943, "score_xfmt": 0.9886041248155096, "score_capint": 0.9995212816044373}
{"statement": "The capital of North Macedonia is Skopje", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 259, "p_dot": 0.8067765047890134, "score_xfmt": 0.9975630005539032, "score_capint": 0.9941171907016975}
{"statement": "The capital of Belize is Belmopan", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 260, "p_dot": 0.5763966717640869, "score_xfmt": 0.004385365427711976, "score_capint": 0.9904417195598328}
{"statement": "The capital of Norway is Oslo", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 261, "p_dot": 0.8331531882286072, "score_xfmt": 0.9976244389978642, "score_capint": 0.9989559406917679}
{"statement": "The capital of Rwanda is Nouakchott", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 262, "p_dot": 0.6959678204730153, "score_xfmt": 2.519722629251394e-10, "score_capint": 0.001661976997777926}
{"statement": "The capital of Afghanistan is Manila", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 263, "p_dot": 0.6958004695479758, "score_xfmt": 1.7281581680779864e-15, "score_capint": 0.0005358696208174486}
{"statement": "The capital of Niger is Jakarta", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 264, "p_dot": 0.6670822307933122, "score_xfmt": 2.2918450286059077e-16, "score_capint": 0.021474381483688673}
{"statement": "The capital of North Korea is Pyongyang", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 265, "p_dot": 0.7790955508244224, "score_xfmt": 0.9999482020688996, "score_capint": 0.9997919059016089}
{"statement": "The capital of Qatar is Doha", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 266, "p_dot": 0.7896205857396126, "score_xfmt": 0.9978859677884915, "score_capint": 0.9994160279734489}
{"statement": "The capital of Peru is Lima", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 267, "p_dot": 0.779202991980128, "score_xfmt": 0.9990430876999036, "score_capint": 0.9973164423929987}
{"statement": "The capital of Belize is Harare", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 268, "p_dot": 0.6830432342831045, "score_xfmt": 3.5826243409530365e-11, "score_capint": 8.300614257782261e-06}
{"statement": "The capital of Croatia is Zagreb", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 269, "p_dot": 0.7780999520327896, "score_xfmt": 0.9994560139074865, "score_capint": 0.9992030982516144}
{"statement": "The capital of Greece is Pretoria", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 270, "p_dot": 0.7253104076953605, "score_xfmt": 2.209217368044054e-12, "score_capint": 0.00027103396508839993}
{"statement": "The capital of Angola is Luanda", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 271, "p_dot": 0.7990132718114182, "score_xfmt": 0.9998699212487476, "score_capint": 0.9995858392441738}
{"statement": "The capital of Chile is Santiago", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 272, "p_dot": 0.7890389644308016, "score_xfmt": 0.9999281036758427, "score_capint": 0.9738597749918995}
{"statement": "The capital of Myanmar is Naypyidaw", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 273, "p_dot": 0.5729744570562616, "score_xfmt": 4.071934042381269e-06, "score_capint": 0.9996402196629862}
{"statement": "The capital of Spain is Madrid", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 274, "p_dot": 0.8022565139108337, "score_xfmt": 0.9942807083735905, "score_capint": 0.9970575638487024}
{"statement": "The capital of Iraq is Baghdad", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 275, "p_dot": 0.7779143396764994, "score_xfmt": 0.9978440562609552, "score_capint": 0.9964332823333603}
{"statement": "The capital of Tunisia is Santiago", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 276, "p_dot": 0.6099257068708539, "score_xfmt": 3.083879678627705e-07, "score_capint": 0.00013847810010244322}
{"statement": "The capital of Senegal is Dakar", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 277, "p_dot": 0.7616439126431942, "score_xfmt": 0.9999534135068354, "score_capint": 0.9986003826393247}
{"statement": "The capital of Colombia is Bogota", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 278, "p_dot": 0.7383397101657465, "score_xfmt": 0.9990790279108406, "score_capint": 0.9995417517529775}
{"statement": "The capital of Lebanon is Beirut", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 279, "p_dot": 0.8002178528113291, "score_xfmt": 0.9998416487559328, "score_capint": 0.985062467604691}
{"statement": "The capital of Slovenia is Ljubljana", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 280, "p_dot": 0.8115439314278774, "score_xfmt": 0.9977277184832801, "score_capint": 0.999574467222845}
{"statement": "The capital of Russia is Moscow", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 281, "p_dot": 0.7431487991125323, "score_xfmt": 0.9999310085036748, "score_capint": 0.9992986285173817}
{"statement": "The capital of Ukraine is Kyiv", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 282, "p_dot": 0.7711774439085275, "score_xfmt": 0.9968911142885895, "score_capint": 0.9961558280251999}
{"statement": "The capital of Niger is Niamey", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 283, "p_dot": 0.7979380061733536, "score_xfmt": 0.9792987820951589, "score_capint": 0.9987064702335836}
{"statement": "The capital of Sudan is Khartoum", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 284, "p_dot": 0.8039696514606476, "score_xfmt": 0.9053263866058389, "score_capint": 0.7746851939961674}
{"statement": "The capital of Nicaragua is Managua", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 285, "p_dot": 0.7271282965084538, "score_xfmt": 0.9991835793942452, "score_capint": 0.9957727238023566}
{"statement": "The capital of Venezuela is Caracas", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 286, "p_dot": 0.7769743460230529, "score_xfmt": 0.9999407055391748, "score_capint": 0.9966052264465526}
{"statement": "The capital of Turkmenistan is Singapore", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 287, "p_dot": 0.6081597676384263, "score_xfmt": 1.1309416621464151e-33, "score_capint": 4.765987646267655e-06}
{"statement": "The capital of Iceland is Manama", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 288, "p_dot": 0.6677709687501192, "score_xfmt": 2.0924861593625593e-14, "score_capint": 0.00041215627694593}
{"statement": "The capital of Estonia is Tallinn", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 289, "p_dot": 0.7537019322626293, "score_xfmt": 0.9999941880799438, "score_capint": 0.9980972328699482}
{"statement": "The capital of Guyana is Georgetown", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 290, "p_dot": 0.7666985999676399, "score_xfmt": 0.9998980197861447, "score_capint": 0.9958738594670693}
{"statement": "The capital of Austria is Vienna", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 291, "p_dot": 0.8002432023640722, "score_xfmt": 0.9998676822807661, "score_capint": 0.9993899984137655}
{"statement": "The capital of Ethiopia is Addis Ababa", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 292, "p_dot": 0.7340310229919851, "score_xfmt": 0.9892786544227209, "score_capint": 0.9993293249654867}
{"statement": "The capital of Finland is Dodoma", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 293, "p_dot": 0.6424268775153905, "score_xfmt": 4.810928291950701e-22, "score_capint": 7.394722096155812e-05}
{"statement": "The capital of Philippines is Port-au-Prince", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 294, "p_dot": 0.6833230445627123, "score_xfmt": 3.932956243719082e-19, "score_capint": 6.519813421053066e-05}
{"statement": "The capital of Nepal is Kathmandu", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 295, "p_dot": 0.7736554060829803, "score_xfmt": 0.9905910770928661, "score_capint": 0.9996109264562343}
{"statement": "The capital of Colombia is San Jose", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 296, "p_dot": 0.3694051755592227, "score_xfmt": 3.564863474825683e-26, "score_capint": 0.0010206005675512501}
{"statement": "The capital of Bosnia and Herzegovina is Nassau", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 297, "p_dot": 0.6279421243816614, "score_xfmt": 2.6095010892642156e-13, "score_capint": 1.0854160827411792e-05}
{"statement": "The capital of Burkina Faso is Ouagadougou", "dataset": "capital", "split": "capitals", "correct": true, "eval_order": 298, "p_dot": 0.8728318574721925, "score_xfmt": 0.9482542175376086, "score_capint": 0.9980816056454099}
{"statement": "The capital of Nicaragua is Damascus", "dataset": "capital", "split": "capitals", "correct": false, "eval_order": 299, "p_dot": 0.6985186610836536, "score_xfmt": 3.423825773859436e-14, "score_capint": 0.00010046755781982194}
