{ "vocab_size": { "total": 32761, "total_with_special": 32768, "base": 256, "merges": 32505, "special": 7, "is_power_of_2": true, "power": 15, "matches_expected": true }, "reachability": { "valid_merges": 32505, "invalid_merges": 0, "reachable": 32761, "unreachable": 0, "all_reachable": true }, "length_dist": { "distribution": { "1": 256, "2": 13428, "3": 6380, "4": 6236, "5": 1763, "6": 1395, "7": 676, "8": 963, "9": 191, "10": 220, "11": 109, "12": 318, "13": 86, "14": 102, "15": 69, "16": 233, "17": 26, "18": 31, "19": 23, "20": 58, "21": 16, "22": 16, "23": 19, "24": 44, "25": 6, "26": 7, "27": 8, "28": 13, "29": 7, "30": 4, "31": 3, "32": 54 }, "avg_length": 3.812393162393162, "min_length": 1, "max_length": 32, "length_3_count": 6380, "length_3_percent": 19.474969474969473 }, "byte_content": { "null_tokens": 8350, "ascii_printable": 6460, "ascii_only": 13796, "high_byte": 18964, "mixed": 10141, "byte_distribution": { "0": 20462, "255": 3502, "72": 2883, "1": 2622, "3": 1967, "139": 1934, "32": 1901, "2": 1856, "64": 1609, "116": 1546, "101": 1482, "36": 1435, "204": 1366, "128": 1212, "65": 1186, "4": 1150, "97": 1109, "114": 1088, "249": 1069, "137": 1059, "111": 990, "8": 978, "105": 964, "115": 940, "15": 917, "110": 917, "99": 879, "16": 837, "192": 814, "232": 810, "108": 798, "131": 788, "68": 777, "84": 740, "224": 737, "112": 732, "117": 723, "48": 701, "5": 690, "169": 687, "76": 684, "69": 663, "100": 653, "95": 650, "6": 647, "73": 623, "141": 614, "10": 570, "7": 562, "66": 546 } }, "diversity": { "1": { "learned": 256, "possible": 256, "coverage": 100.0 }, "2": { "learned": 13428, "possible": 65536, "coverage": 20.489501953125 }, "3": { "learned": 6380, "possible": 16777216, "coverage": 0.03802776336669922 }, "4": { "learned": 6236, "possible": 4294967296, "coverage": 0.0001451931893825531 } } }