{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "6ku-00jnLFM6" }, "outputs": [], "source": [ "# -*- coding: utf-8 -*-\n", "\"\"\"\n", "BiLSTM Chess OCR - v6 (V47 preprocessing + compute-friendly augmentation)\n", "\n", "==================================================================\n", "\n", "This script includes the final preprocessing pipeline that:\n", "1. Removes horizontal lines (top + bottom)\n", "2. Detects move number boxes via background color transition\n", "3. Finds handwriting extent using non-line rows\n", "4. Crops tightly, normalizes height, left-aligns, right-pads\n", "\n", "Run on Google Colab with GPU: Runtime > Change runtime type > T4 GPU\n", "\n", "To split into cells, search for: # ============ CELL\n", "\"\"\"\n", "\n", "import math\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "y_IxWkw_PWp5" }, "outputs": [], "source": [ "\n", "# ============ CELL 1: Install Dependencies ============\n", "!pip install -q datasets pillow matplotlib torch torchvision\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LTp8PDX_Pwpo", "outputId": "f41689f0-b373-4856-d31e-c9168e070633" }, "outputs": [], "source": [ "\n", "# ============ CELL 2: Imports ============\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "from torch.utils.data import Dataset, DataLoader\n", "from datasets import load_dataset\n", "from PIL import Image\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import math\n", "\n", "import cv2\n", "from scipy import signal\n", "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "print(f\"Using device: {device}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jZ0KfzX6Pu0z", "outputId": "bf489fb7-32a1-45ad-e84f-70bd31842a7c" }, "outputs": [], "source": [ "\n", "# ============ CELL 3: Configuration ============\n", "IMG_HEIGHT = 64\n", "IMG_WIDTH = 256\n", "\n", "# Preprocessing ablation flags (v47):\n", "REMOVE_LINES = False # keep ruled lines (baseline-like)\n", "LEFT_ALIGN = False # center-pad instead of left-align\n", "\n", "CHESS_CHARS = [\n", " '', # CTC blank (index 0)\n", " 'K', 'Q', 'R', 'B', 'N',\n", " 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',\n", " '1', '2', '3', '4', '5', '6', '7', '8',\n", " 'x', '+', '#', '=', 'O', '-',\n", "]\n", "\n", "char_to_idx = {c: i for i, c in enumerate(CHESS_CHARS)}\n", "idx_to_char = {i: c for i, c in enumerate(CHESS_CHARS)}\n", "NUM_CLASSES = len(CHESS_CHARS)\n", "print(f\"Character set: {NUM_CLASSES} classes\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "uEBhmoXpPs6r" }, "outputs": [], "source": [ "\n", "# ============ CELL 4: V47 Preprocessing Functions ============\n", "\n", "def find_transition_end(gray):\n", " \"\"\"\n", " Find where the move number box transition ENDS (fully into light zone).\n", " Uses max brightness (paper color) gradient detection.\n", "\n", " Returns (has_move_number, boundary_x)\n", " \"\"\"\n", " h, w = gray.shape\n", "\n", " if w < 250:\n", " return False, 0\n", "\n", " # Use middle strip to avoid horizontal lines\n", " middle_strip = gray[h//3 : 2*h//3, :]\n", " col_max = np.max(middle_strip, axis=0)\n", " col_max_smooth = np.convolve(col_max, np.ones(5)/5, mode='same')\n", " gradient = np.gradient(col_max_smooth)\n", "\n", " # Find max gradient in 160-200 range (where box boundary typically is)\n", " search_start, search_end = 160, 200\n", " if w < search_end:\n", " return False, 0\n", "\n", " search_region = gradient[search_start:search_end]\n", " max_grad_idx = np.argmax(search_region) + search_start\n", " max_grad_val = search_region[max_grad_idx - search_start]\n", "\n", " # Check if there's a significant transition\n", " left_brightness = np.mean(col_max_smooth[140:170])\n", " right_brightness = np.mean(col_max_smooth[195:220])\n", " brightness_diff = right_brightness - left_brightness\n", "\n", " # Must have positive gradient AND brightness increase\n", " if max_grad_val < 0.8 or brightness_diff < 5:\n", " return False, 0\n", "\n", " # Find where gradient drops below threshold AFTER the peak (transition END)\n", " end_threshold = 0.3\n", " transition_end = max_grad_idx + 10 # Default fallback\n", "\n", " for x in range(max_grad_idx, min(max_grad_idx + 25, w)):\n", " if gradient[x] < end_threshold:\n", " transition_end = x\n", " break\n", "\n", " return True, transition_end\n", "\n", "\n", "def remove_horizontal_lines(gray):\n", " \"\"\"\n", " Remove horizontal lines - original method for bottom + aggressive for top.\n", " Returns list of row indices that are NOT part of lines.\n", " \"\"\"\n", " h, w = gray.shape\n", "\n", " line_rows = set()\n", "\n", " # PART 1: Peak detection (good for bottom lines)\n", " h_proj = np.sum(255 - gray, axis=1).astype(float)\n", " h_proj_norm = (h_proj - h_proj.min()) / (h_proj.max() - h_proj.min() + 1e-6)\n", "\n", " peaks, properties = signal.find_peaks(h_proj_norm, prominence=0.1, height=0.2,\n", " width=1, rel_height=0.5)\n", "\n", " if len(peaks) > 0:\n", " peak_heights = properties['peak_heights']\n", " left_ips = properties['left_ips']\n", " right_ips = properties['right_ips']\n", "\n", " for i, peak in enumerate(peaks):\n", " ph = peak_heights[i] if i < len(peak_heights) else 0\n", " near_edge = peak < h * 0.15 or peak > h * 0.85\n", " if ph > 0.6 or (near_edge and ph > 0.3):\n", " left = int(left_ips[i]) - 3 if i < len(left_ips) else peak - 5\n", " right = int(right_ips[i]) + 3 if i < len(right_ips) else peak + 5\n", " for r in range(max(0, left), min(h, right + 1)):\n", " line_rows.add(r)\n", "\n", " # PART 2: Additional check for TOP line (often missed by peak detection)\n", " top_region_end = int(h * 0.15)\n", " top_region = gray[:top_region_end, :]\n", "\n", " top_ink = np.sum(255 - top_region, axis=1).astype(float)\n", " if len(top_ink) > 0 and top_ink.max() > 0:\n", " top_ink_norm = top_ink / top_ink.max()\n", "\n", " for r, val in enumerate(top_ink_norm):\n", " if val > 0.5: # High ink density = line\n", " for dr in range(-2, 5):\n", " if 0 <= r + dr < h:\n", " line_rows.add(r + dr)\n", "\n", " non_line_rows = sorted([i for i in range(h) if i not in line_rows])\n", "\n", " if len(non_line_rows) < 10:\n", " non_line_rows = list(range(h))\n", "\n", " return non_line_rows\n", "\n", "\n", "def find_handwriting_extent(gray, start_x, non_line_rows):\n", " \"\"\"\n", " Find where handwriting starts and ends, using only non-line rows.\n", " Returns (hw_start, hw_end)\n", " \"\"\"\n", " h, w = gray.shape\n", "\n", " if start_x >= w:\n", " return start_x, w - 1\n", "\n", " if len(non_line_rows) == 0:\n", " non_line_rows = list(range(h//4, 3*h//4))\n", "\n", " # Extract only non-line rows after start_x\n", " region = gray[non_line_rows, :][:, start_x:]\n", "\n", " # Otsu threshold to find ink\n", " otsu_thresh, _ = cv2.threshold(region.astype(np.uint8), 0, 255,\n", " cv2.THRESH_BINARY + cv2.THRESH_OTSU)\n", "\n", " # Dark pixels (ink)\n", " dark_mask = region < otsu_thresh\n", "\n", " # Sum dark pixels per column\n", " col_ink = np.sum(dark_mask, axis=0)\n", " col_ink_smooth = np.convolve(col_ink, np.ones(3)/3, mode='same')\n", "\n", " # Find first and last column with significant ink\n", " threshold = len(non_line_rows) * 0.03\n", " ink_cols = np.where(col_ink_smooth > threshold)[0]\n", "\n", " if len(ink_cols) > 0:\n", " hw_start = start_x + ink_cols[0]\n", " hw_end = start_x + ink_cols[-1]\n", " else:\n", " hw_start = start_x\n", " hw_end = w - 1\n", "\n", " return hw_start, hw_end\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "rhxl1Xb2Uud6" }, "outputs": [], "source": [ "def preprocess_image_for_ctc(image, target_height=IMG_HEIGHT, target_width=IMG_WIDTH, augment=False, augment_mode=None, debug=False, remove_lines=REMOVE_LINES, left_align=LEFT_ALIGN):\n", " \"\"\"\n", " V47 - Complete preprocessing (with optional line removal / alignment):\n", " 1. (Optional) Remove horizontal lines (top + bottom)\n", " 2. Detect move-number box (if present)\n", " 3. Find handwriting extent (using non-line rows only)\n", " 4. Crop around handwriting\n", " 5. Normalize height, left-align, right-pad\n", "\n", " Augmentation (training only, compute-friendly):\n", " - Optionally apply *one* geometric transform (rotate OR horizontal shear OR horizontal scale)\n", " - Plus light photometric jitter (brightness/contrast) and small vertical shift\n", " - No perspective/blur/noise/thickness by default (keeps Colab fast and avoids OOD distortions)\n", "\n", " Notes:\n", " - If augment_mode is provided, apply ONLY that transform (kept for compatibility).\n", " - If augment=True and augment_mode is None, apply a light probabilistic mix (no stacking of heavy geometry).\n", " \"\"\"\n", " if isinstance(image, Image.Image):\n", " gray = np.array(image.convert('L'))\n", " else:\n", " gray = image.copy() if len(image.shape) == 2 else cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", "\n", " h, w = gray.shape\n", "\n", " # === DATA AUGMENTATION (training only) ===\n", " def _aug_rotate(img):\n", " angle = float(np.random.uniform(-10.0, 10.0))\n", " if abs(angle) <= 0.25:\n", " return img\n", " center = (w // 2, h // 2)\n", " M = cv2.getRotationMatrix2D(center, angle, 1.0)\n", " return cv2.warpAffine(img, M, (w, h), borderValue=255)\n", "\n", " def _aug_hscale(img):\n", " scale_x = float(np.random.uniform(0.8, 1.2))\n", " new_w = max(1, int(round(w * scale_x)))\n", " if new_w == w:\n", " return img\n", " scaled = cv2.resize(img, (new_w, h), interpolation=cv2.INTER_LINEAR)\n", " if new_w > w:\n", " x0 = (new_w - w) // 2\n", " return scaled[:, x0:x0 + w]\n", " pad_left = (w - new_w) // 2\n", " pad_right = w - new_w - pad_left\n", " return cv2.copyMakeBorder(\n", " scaled, 0, 0, pad_left, pad_right,\n", " borderType=cv2.BORDER_CONSTANT, value=255\n", " )\n", "\n", " def _aug_hshear(img):\n", " shear_deg = float(np.random.uniform(-15.0, 15.0))\n", " if abs(shear_deg) <= 0.25:\n", " return img\n", " shear = math.tan(math.radians(shear_deg))\n", " M = np.array([[1.0, shear, 0.0],\n", " [0.0, 1.0, 0.0]], dtype=np.float32)\n", " M[0, 2] = -shear * (h / 2.0)\n", " return cv2.warpAffine(img, M, (w, h), borderValue=255)\n", "\n", " def _aug_vshift(img):\n", " dy = int(np.random.uniform(-3, 4)) # [-3, +3]\n", " if dy == 0:\n", " return img\n", " M = np.array([[1.0, 0.0, 0.0],\n", " [0.0, 1.0, float(dy)]], dtype=np.float32)\n", " return cv2.warpAffine(img, M, (w, h), borderValue=255)\n", "\n", " def _aug_brightness_contrast(img):\n", " contrast = float(np.random.uniform(0.85, 1.15))\n", " brightness = float(np.random.uniform(-12.0, 12.0))\n", " return np.clip(img.astype(np.float32) * contrast + brightness, 0, 255).astype(np.uint8)\n", "\n", " AUGMENTERS = {\n", " \"rotate\": _aug_rotate,\n", " \"hscale\": _aug_hscale,\n", " \"hshear\": _aug_hshear,\n", " \"brightness_contrast\": _aug_brightness_contrast,\n", " \"vshift\": _aug_vshift,\n", " }\n", "\n", " if augment or (augment_mode is not None):\n", " if augment_mode is not None:\n", " fn = AUGMENTERS.get(augment_mode)\n", " if fn is None:\n", " raise ValueError(f\"Unknown augment_mode: {augment_mode}\")\n", " gray = fn(gray)\n", " else:\n", " # Light mix (no stacking of heavy geometry)\n", " if np.random.random() < 0.7:\n", " gray = _aug_brightness_contrast(gray)\n", " if np.random.random() < 0.4:\n", " gray = _aug_vshift(gray)\n", "\n", " r = np.random.random()\n", " if r < 0.35:\n", " gray = _aug_rotate(gray)\n", " elif r < 0.60:\n", " gray = _aug_hshear(gray)\n", " elif r < 0.80:\n", " gray = _aug_hscale(gray)\n", "\n", " gray = gray.astype(np.uint8)\n", "\n", "# === STEP 1: (Optional) remove horizontal lines ===\n", " if remove_lines:\n", " non_line_rows = remove_horizontal_lines(gray)\n", " if debug:\n", " print(f\" Lines removed: {h - len(non_line_rows)} rows\")\n", " else:\n", " non_line_rows = list(range(h))\n", " if debug:\n", " print(\" Line removal disabled\")\n", "\n", " # === STEP 2: ALWAYS try to detect move number box FIRST ===\n", " has_move_num, move_num_boundary = find_transition_end(gray)\n", " if debug:\n", " print(f\" Move number detected: {has_move_num}, boundary: {move_num_boundary}\")\n", "\n", " # Start searching for handwriting after boundary (if move number detected)\n", " search_start = move_num_boundary if has_move_num else 0\n", "\n", " # === STEP 4: Find handwriting extent (using non-line rows) ===\n", " hw_start, hw_end = find_handwriting_extent(gray, search_start, non_line_rows)\n", " if debug:\n", " print(f\" Handwriting: x={hw_start} to {hw_end}\")\n", "\n", " # === STEP 5: Add padding ===\n", " padding = max(8, int((hw_end - hw_start) * 0.05))\n", " x_min = max(0, hw_start - padding)\n", " x_max = min(w - 1, hw_end + padding)\n", "\n", " # Don't go back before the boundary if move number was detected\n", " if has_move_num:\n", " x_min = max(x_min, move_num_boundary - 5)\n", "\n", " if debug:\n", " print(f\" Final crop: x={x_min} to {x_max} (width={x_max - x_min})\")\n", "\n", " # === STEP 6: Vertical crop (find rows with ink) ===\n", " crop_region = gray[non_line_rows, :][:, x_min:x_max+1]\n", " otsu_thresh, _ = cv2.threshold(crop_region.astype(np.uint8), 0, 255,\n", " cv2.THRESH_BINARY + cv2.THRESH_OTSU)\n", "\n", " y_with_ink = []\n", " for i, y in enumerate(non_line_rows):\n", " if x_min < w and np.any(gray[y, x_min:x_max] < otsu_thresh):\n", " y_with_ink.append(y)\n", "\n", " if len(y_with_ink) < 5:\n", " y_with_ink = non_line_rows\n", "\n", " # Add small vertical padding\n", " if len(y_with_ink) > 0:\n", " try:\n", " y_min_idx = max(0, non_line_rows.index(min(y_with_ink)) - 3)\n", " y_max_idx = min(len(non_line_rows) - 1, non_line_rows.index(max(y_with_ink)) + 3)\n", " y_with_ink = non_line_rows[y_min_idx:y_max_idx+1]\n", " except ValueError:\n", " pass\n", "\n", " # === STEP 7: Extract and normalize ===\n", " cropped_rows = [gray[y, x_min:x_max+1] for y in y_with_ink]\n", "\n", " if len(cropped_rows) == 0:\n", " return np.full((target_height, target_width), 255, dtype=np.uint8)\n", "\n", " cropped = np.stack(cropped_rows, axis=0)\n", "\n", " # Normalize contrast\n", " c_min, c_max = cropped.min(), cropped.max()\n", " if c_max > c_min:\n", " normalized = ((cropped - c_min) / (c_max - c_min) * 255).astype(np.uint8)\n", " else:\n", " normalized = cropped\n", "\n", " # === STEP 8: Scale to target height, preserve aspect ratio ===\n", " ch, cw = normalized.shape\n", " if ch == 0 or cw == 0:\n", " return np.full((target_height, target_width), 255, dtype=np.uint8)\n", "\n", " aspect = cw / ch\n", " new_width = int(target_height * aspect)\n", " new_width = max(1, min(new_width, target_width))\n", "\n", " resized = cv2.resize(normalized, (new_width, target_height), interpolation=cv2.INTER_AREA)\n", "\n", " # === STEP 9: LEFT-ALIGN, RIGHT-PAD ===\n", " result = np.full((target_height, target_width), 255, dtype=np.uint8)\n", " if left_align:\n", " x0 = 0\n", " else:\n", " x0 = max(0, (target_width - new_width) // 2)\n", " result[:, x0:x0 + new_width] = resized\n", "\n", " return result\n" ] }, { "cell_type": "markdown", "metadata": { "id": "k_ViMVRD8XWY" }, "source": [ "# Data augmentation (paper + additional real-world variation)\n", "\n", "When training (`augment=True`), `preprocess_image_for_ctc(...)` now applies ****one** augmentation variant at a time (or none), to keep samples realistic and training efficient.\n", "\n", "**Paper (ICDAR 2021, Section 2.4) augmentations implemented:**\n", "- Rotation: −10° … +10°\n", "- Horizontal scaling: width × 0.8 … 1.2\n", "- Horizontal shear: −15° … +15°\n", "\n", "**Additional mild augmentations implemented:**\n", "- Vertical translation (a few pixels)\n", "- Brightness/contrast jitter\n", "\n", "If training becomes unstable or accuracy drops, reduce the probabilities inside `preprocess_image_for_ctc(...)` (they are all grouped under the `if augment:` block).\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 321, "referenced_widgets": [ "896b6a8247ab48e18b7e728f56e4ea45", "3b4a3ca4516347d6bc11b98cd7fd0d07", "0bae02fbd7f84bf18ecaaccdf7e745a7", "01dcb79b363c4820865a19a8d1434818", "b14930674d26476ead6b0cb45cc02472", "8a70a2f077f74897aca169301bbb3c98", "a75fdd66b0564876980ef05e0d11b9f5", "1234354e5f3c4fc2b337425bd88e485b", "987ed53098134ee18f5be552a0b69446", "353fef0714774e69a728279d189467a2", "182a19bb9326402f90a208ec5e9e1829", "d4536bb4c52447948f21dd333efb0051", "e32f0946287b4110bba44ea1e0fa5acf", "561cb931f06d467785b0f562bbf2b8d1", "d2095c811a634a90ad2ed01c3db9892d", "fe0b285b43174e3b8c8bf089f99aca6a", "d0517fc29e4440f2937de01e04faa940", "0f344c3d95e649db94ad43f9b963f048", "8682424db93c4a8fa697131aac827229", "a60bf51a42d142dbacea601ac78238a1", "c080622acd36403e8d9aabbee227030b", "5bcd47c7aaa84635adc6acdc9009715f", "349ecf89ae344283bc40831ceb91c334", "065d75a276d84f8aa5df0bc3ad7e5cd8", "4e01a5a3ef8241aa9eb31cf2bf363be7", "68995937d21d42e0838a788f33c71027", "5c13c9124a234b0b85b52c68088832ee", "8fee67d5099f4989b4d308841def4165", "f5e1abdbbb7a463985a8d0a0cb7b60a4", "3331033115044ea2985e9f86f5cde8d0", "098db5b7992c42f6b464016bd2e3d064", "589f54fe217c402eb435de5b3197391e", "d1002f54bc1f45568abe534a6f402210", "5cd0d2061b024f44bee56945983208a4", "b54c0a7d46754e2d968f1018bc10397b", "0740aca90d474885b2fda2618574eb11", "ac429c6b9e214a96b67eafbcf003afae", "287bd2d39ea441ada2598dc4f0be10e2", "9586da4581e94706b5365b4e554152be", "83c0281c4c2f4abe99d61fdd8716195c", "1d6ce483f4584940a2650b4df3e99f92", "7517d295cd454dd4a2d3d5b45905ffac", "01e88a4eeb3240e196911fffb04e488c", "868bbc2e21904e909fcd681a36653970" ] }, "id": "qrd9SCPAPoHz", "outputId": "af26579c-2c87-45b1-95f4-682632abeb8a" }, "outputs": [], "source": [ "\n", "# ============ CELL 5: Load Dataset ============\n", "print(\"Loading HCS dataset...\")\n", "dataset = load_dataset(\"BenjaminKost/processed_hcs\")\n", "print(f\"Total samples: {len(dataset['train'])}\")\n", "\n", "split = dataset['train'].train_test_split(test_size=0.1, seed=42)\n", "train_data = split['train']\n", "val_data = split['test']\n", "print(f\"Train: {len(train_data)}, Val: {len(val_data)}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "1XCsFcGNPmxA", "outputId": "dfa88532-9f70-47d4-a82b-f1bd0bda3ce5" }, "outputs": [], "source": [ "\n", "# ============ CELL 6: Verify Preprocessing ============\n", "print(\"=\" * 60)\n", "print(\"PREPROCESSING VERIFICATION\")\n", "print(\"=\" * 60)\n", "\n", "test_indices = [0, 1, 10, 50, 100, 500, 507, 1000, 5000]\n", "test_indices = [i for i in test_indices if i < len(train_data)]\n", "\n", "fig, axes = plt.subplots(len(test_indices), 2, figsize=(12, 2.5 * len(test_indices)))\n", "\n", "for row, idx in enumerate(test_indices):\n", " sample = train_data[idx]\n", " gray = np.array(sample['image'].convert('L'))\n", " label = sample['label']\n", "\n", " processed = preprocess_image_for_ctc(sample['image'], augment=False)\n", "\n", " # Metrics\n", " left_ink = np.sum(processed[:, :IMG_WIDTH//2] < 200)\n", " right_ink = np.sum(processed[:, IMG_WIDTH//2:] < 200)\n", " right_q = processed[:, -IMG_WIDTH//4:]\n", " right_white = np.sum(right_q > 240) / right_q.size\n", "\n", " passed = (left_ink > right_ink) and (right_white > 0.7)\n", "\n", " axes[row, 0].imshow(gray[:, :350], cmap='gray')\n", " axes[row, 0].set_title(f\"[{idx}] '{label}'\", fontsize=10)\n", " axes[row, 0].axis('off')\n", "\n", " axes[row, 1].imshow(processed, cmap='gray')\n", " color = 'green' if passed else 'red'\n", " status = \"✓\" if passed else \"✗\"\n", " axes[row, 1].set_title(f'{status} RW:{right_white:.0%}', fontsize=10, color=color)\n", " axes[row, 1].axis('off')\n", "\n", "plt.suptitle(\"V47 Preprocessing Verification\", fontsize=14)\n", "plt.tight_layout()\n", "plt.savefig('v40_verification.png', dpi=150)\n", "plt.show()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dMDWmH6NPlfI" }, "outputs": [], "source": [ "\n", "# ============ CELL 7: Dataset Class ============\n", "class ChessOCRDataset(Dataset):\n", " def __init__(self, hf_dataset, augment=False, augment_mode=None):\n", " self.dataset = hf_dataset\n", " self.augment = augment\n", " self.augment_mode = augment_mode\n", "\n", " def __len__(self):\n", " return len(self.dataset)\n", "\n", " def encode_label(self, label):\n", " return [char_to_idx[c] for c in label if c in char_to_idx]\n", "\n", " def __getitem__(self, idx):\n", " sample = self.dataset[idx]\n", " image = sample['image']\n", " label = sample['label']\n", "\n", " processed = preprocess_image_for_ctc(image, augment=self.augment, augment_mode=self.augment_mode)\n", "\n", " tensor = torch.from_numpy(processed).float() / 255.0\n", " tensor = (tensor - 0.5) / 0.5 # Normalize to [-1, 1]\n", " tensor = tensor.unsqueeze(0) # Add channel dim\n", "\n", " encoded = self.encode_label(label)\n", "\n", " return {\n", " 'image': tensor,\n", " 'label': encoded,\n", " 'label_str': label,\n", " 'length': len(encoded)\n", " }\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FREbtLuEPj5X", "outputId": "6e96c407-2532-4255-dd35-ea7f56ca8166" }, "outputs": [], "source": [ "\n", "# ============ CELL 8: Collate Function ============\n", "def collate_fn(batch):\n", " images = torch.stack([item['image'] for item in batch])\n", " labels = [torch.tensor(item['label'], dtype=torch.long) for item in batch]\n", " label_lengths = torch.tensor([item['length'] for item in batch], dtype=torch.long)\n", " label_strs = [item['label_str'] for item in batch]\n", "\n", " max_len = max(len(l) for l in labels)\n", " padded_labels = torch.zeros(len(labels), max_len, dtype=torch.long)\n", " for i, l in enumerate(labels):\n", " padded_labels[i, :len(l)] = l\n", "\n", " return {\n", " 'images': images,\n", " 'labels': padded_labels,\n", " 'label_lengths': label_lengths,\n", " 'label_strs': label_strs\n", " }\n", "\n", "\n", "# ============ CELL 9: Create DataLoaders ============\n", "# DataLoaders (compute-friendly augmentation: light probabilistic mix, no heavy stacking)\n", "AUGMENT = True # training only; validation stays clean\n", "\n", "train_dataset = ChessOCRDataset(train_data, augment=AUGMENT)\n", "val_dataset = ChessOCRDataset(val_data, augment=False)\n", "\n", "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,\n", " collate_fn=collate_fn, num_workers=0, pin_memory=True)\n", "val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False,\n", " collate_fn=collate_fn, num_workers=0, pin_memory=True)\n", "\n", "print(f\"Train batches: {len(train_loader)}, Val batches: {len(val_loader)}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_-Vw0oD-Phx5", "outputId": "393e81d7-0bc0-4ccb-fca6-0aac6ebadbe6" }, "outputs": [], "source": [ "\n", "# ============ CELL 10: Model Architecture ============\n", "class ChessBiLSTM(nn.Module):\n", " def __init__(self, num_classes, hidden_size=256):\n", " super().__init__()\n", "\n", " self.cnn = nn.Sequential(\n", " nn.Conv2d(1, 64, kernel_size=3, padding=1),\n", " nn.BatchNorm2d(64),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d(2, 2),\n", "\n", " nn.Conv2d(64, 128, kernel_size=3, padding=1),\n", " nn.BatchNorm2d(128),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d(2, 2),\n", "\n", " nn.Conv2d(128, 256, kernel_size=3, padding=1),\n", " nn.BatchNorm2d(256),\n", " nn.ReLU(inplace=True),\n", "\n", " nn.Conv2d(256, 256, kernel_size=3, padding=1),\n", " nn.BatchNorm2d(256),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d((2, 1), (2, 1)),\n", "\n", " nn.Conv2d(256, 512, kernel_size=3, padding=1),\n", " nn.BatchNorm2d(512),\n", " nn.ReLU(inplace=True),\n", "\n", " nn.Conv2d(512, 512, kernel_size=3, padding=1),\n", " nn.BatchNorm2d(512),\n", " nn.ReLU(inplace=True),\n", " nn.MaxPool2d((2, 1), (2, 1)),\n", "\n", " nn.Conv2d(512, 512, kernel_size=2, padding=0),\n", " nn.BatchNorm2d(512),\n", " nn.ReLU(inplace=True),\n", " )\n", "\n", " self.rnn = nn.LSTM(\n", " input_size=512 * 3,\n", " hidden_size=hidden_size,\n", " num_layers=2,\n", " bidirectional=True,\n", " batch_first=True,\n", " dropout=0.3\n", " )\n", "\n", " self.fc = nn.Linear(hidden_size * 2, num_classes)\n", "\n", " def forward(self, x):\n", " conv = self.cnn(x)\n", " batch, c, h, w = conv.size()\n", " conv = conv.permute(0, 3, 1, 2)\n", " conv = conv.reshape(batch, w, c * h)\n", " rnn_out, _ = self.rnn(conv)\n", " output = self.fc(rnn_out)\n", " output = output.permute(1, 0, 2)\n", " return output\n", "\n", "\n", "model = ChessBiLSTM(NUM_CLASSES, hidden_size=256).to(device)\n", "print(f\"Model parameters: {sum(p.numel() for p in model.parameters()):,}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3xBoVqm5Pf4S" }, "outputs": [], "source": [ "\n", "# ============ CELL 11: Training Setup ============\n", "criterion = nn.CTCLoss(blank=0, zero_infinity=True)\n", "optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)\n", "scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)\n", "\n", "\n", "def decode_predictions(output, blank=0):\n", " _, max_indices = torch.max(output, dim=2)\n", " max_indices = max_indices.permute(1, 0)\n", "\n", " decoded = []\n", " for sequence in max_indices:\n", " chars = []\n", " prev = blank\n", " for idx in sequence:\n", " idx = idx.item()\n", " if idx != blank and idx != prev:\n", " chars.append(idx_to_char.get(idx, ''))\n", " prev = idx\n", " decoded.append(''.join(chars))\n", " return decoded\n", "\n", "\n", "def calculate_accuracy(predictions, targets):\n", " correct = sum(p == t for p, t in zip(predictions, targets))\n", " return correct / len(targets)\n", "\n", "\n", "def calculate_cer(predictions, targets):\n", " total_chars = sum(len(t) for t in targets)\n", " total_errors = 0\n", " for pred, target in zip(predictions, targets):\n", " errors = abs(len(pred) - len(target))\n", " for i in range(min(len(pred), len(target))):\n", " if pred[i] != target[i]:\n", " errors += 1\n", " total_errors += errors\n", " return total_errors / max(total_chars, 1)\n", "\n" ] }, { "cell_type": "markdown", "metadata": { "id": "7T3WdOt1HQm0" }, "source": [ "ONLY IF we are RESUMING a previously interupted traning:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "tH-rHVLxGoWZ", "outputId": "f2796a7a-3d90-4e98-e66f-cbcc70469eb7" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')\n", "\n", "BEST_CKPT_PATH = '/content/drive/MyDrive/best_chess_ocr_v47.pth'\n", "LATEST_CKPT_PATH = '/content/drive/MyDrive/latest_chess_ocr_v47.pth' # optional, if you add it" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8T6Vo7_dGoOx", "outputId": "80561678-1289-41eb-cb74-1cf9f23e8329" }, "outputs": [], "source": [ "import os\n", "import torch\n", "\n", "# Prefer latest if you have it; otherwise fall back to best\n", "if os.path.exists(LATEST_CKPT_PATH):\n", " RESUME_PATH = LATEST_CKPT_PATH\n", " print(\"Resuming from LATEST:\", RESUME_PATH)\n", "elif os.path.exists(BEST_CKPT_PATH):\n", " RESUME_PATH = BEST_CKPT_PATH\n", " print(\"Resuming from BEST:\", RESUME_PATH)\n", "else:\n", " RESUME_PATH = None\n", " print(\"No checkpoint found on Drive. Starting from scratch.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vLif09loGoFw", "outputId": "89507cb5-56db-4e54-d37d-9f912070eea6" }, "outputs": [], "source": [ "start_epoch = 0\n", "best_accuracy = 0.0\n", "\n", "if RESUME_PATH is not None:\n", " checkpoint = torch.load(RESUME_PATH, map_location=device)\n", "\n", " model.load_state_dict(checkpoint['model_state_dict'])\n", " optimizer.load_state_dict(checkpoint['optimizer_state_dict'])\n", "\n", " # If you saved scheduler state, restore it (won't crash if missing)\n", " if 'scheduler_state_dict' in checkpoint:\n", " try:\n", " scheduler.load_state_dict(checkpoint['scheduler_state_dict'])\n", " except Exception as e:\n", " print(\"Warning: could not load scheduler state:\", e)\n", "\n", " # Resume bookkeeping\n", " start_epoch = int(checkpoint.get('epoch', -1)) + 1\n", " best_accuracy = float(checkpoint.get('best_accuracy', checkpoint.get('accuracy', 0.0)))\n", "\n", " # If you saved history, restore it\n", " if 'history' in checkpoint and isinstance(checkpoint['history'], dict):\n", " history = checkpoint['history']\n", "\n", " print(f\"✅ Loaded checkpoint from: {RESUME_PATH}\")\n", " print(f\" Stored epoch (0-based): {checkpoint.get('epoch', '??')}\")\n", " print(f\" Will resume at epoch (0-based): {start_epoch}\")\n", " print(f\" Best accuracy so far: {best_accuracy*100:.2f}%\")\n", "else:\n", " print(\"✅ Starting fresh (no checkpoint loaded).\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jU0e5Jg3Pd_O", "outputId": "69a3cc47-c246-4c9b-c94d-26da0273b36d" }, "outputs": [], "source": [ "\n", "# ============ CELL 12: Training Loop ============\n", "NUM_EPOCHS = 50\n", "best_accuracy = 0\n", "history = {'train_loss': [], 'val_loss': [], 'val_acc': [], 'val_cer': []}\n", "\n", "print(\"=\" * 60)\n", "print(\"TRAINING WITH V47 PREPROCESSING\")\n", "print(\"=\" * 60)\n", "\n", "# --- for epoch in range(NUM_EPOCHS):\n", "for epoch in range(start_epoch, NUM_EPOCHS):\n", " model.train()\n", " train_loss = 0\n", "\n", " for batch in train_loader:\n", " images = batch['images'].to(device)\n", " labels = batch['labels']\n", " label_lengths = batch['label_lengths']\n", "\n", " optimizer.zero_grad()\n", "\n", " output = model(images)\n", " output = output.log_softmax(2)\n", "\n", " seq_len = output.size(0)\n", " input_lengths = torch.full((images.size(0),), seq_len, dtype=torch.long)\n", "\n", " flat_labels = []\n", " for i in range(labels.size(0)):\n", " flat_labels.extend(labels[i, :label_lengths[i]].tolist())\n", " flat_labels = torch.tensor(flat_labels, dtype=torch.long)\n", "\n", " loss = criterion(output, flat_labels, input_lengths, label_lengths)\n", "\n", " if torch.isfinite(loss):\n", " loss.backward()\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)\n", " optimizer.step()\n", " train_loss += loss.item()\n", "\n", " train_loss /= len(train_loader)\n", "\n", " # Validation\n", " model.eval()\n", " val_loss = 0\n", " all_preds, all_targets = [], []\n", "\n", " with torch.no_grad():\n", " for batch in val_loader:\n", " images = batch['images'].to(device)\n", " labels = batch['labels']\n", " label_lengths = batch['label_lengths']\n", "\n", " output = model(images)\n", " output = output.log_softmax(2)\n", "\n", " seq_len = output.size(0)\n", " input_lengths = torch.full((images.size(0),), seq_len, dtype=torch.long)\n", "\n", " flat_labels = []\n", " for i in range(labels.size(0)):\n", " flat_labels.extend(labels[i, :label_lengths[i]].tolist())\n", " flat_labels = torch.tensor(flat_labels, dtype=torch.long)\n", "\n", " loss = criterion(output, flat_labels, input_lengths, label_lengths)\n", " if torch.isfinite(loss):\n", " val_loss += loss.item()\n", "\n", " preds = decode_predictions(output)\n", " all_preds.extend(preds)\n", " all_targets.extend(batch['label_strs'])\n", "\n", " val_loss /= len(val_loader)\n", " val_acc = calculate_accuracy(all_preds, all_targets)\n", " val_cer = calculate_cer(all_preds, all_targets)\n", "\n", " scheduler.step(val_loss)\n", "\n", " history['train_loss'].append(train_loss)\n", " history['val_loss'].append(val_loss)\n", " history['val_acc'].append(val_acc)\n", " history['val_cer'].append(val_cer)\n", "\n", " print(f\"Epoch {epoch+1:2d}/{NUM_EPOCHS} | \"\n", " f\"Train: {train_loss:.4f} | Val: {val_loss:.4f} | \"\n", " f\"Acc: {val_acc*100:.1f}% | CER: {val_cer*100:.1f}%\")\n", "\n", " if val_acc > best_accuracy:\n", " best_accuracy = val_acc\n", " torch.save({\n", " 'epoch': epoch,\n", " 'model_state_dict': model.state_dict(),\n", " 'optimizer_state_dict': optimizer.state_dict(),\n", " 'accuracy': val_acc,\n", " }, 'best_chess_ocr_v47.pth')\n", " print(f\" ✓ New best! Accuracy: {val_acc*100:.1f}%\")\n", "\n", " if (epoch + 1) % 10 == 0:\n", " print(\" Samples:\")\n", " for i in range(min(5, len(all_preds))):\n", " match = \"✓\" if all_preds[i] == all_targets[i] else \"✗\"\n", " print(f\" {match} '{all_targets[i]}' → '{all_preds[i]}'\")\n", "\n", "print(\"=\" * 60)\n", "print(f\"Training complete! Best accuracy: {best_accuracy*100:.1f}%\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iXqmQywNnpuC", "outputId": "de62ca39-7bb7-4e96-b763-1a1017e996e0" }, "outputs": [], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')\n", "!cp best_chess_ocr_v47.pth /content/drive/MyDrive/" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 17 }, "id": "LRr-pbSGnpdt", "outputId": "0453b887-eee6-4e11-b693-d2a7ec7d8ce8" }, "outputs": [], "source": [ "from google.colab import files\n", "files.download('best_chess_ocr_v47.pth')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "id": "YKXS_SnFPa9H", "outputId": "ba99d698-8758-4e5c-a2f3-73cc95c5ae19" }, "outputs": [], "source": [ "\n", "# ============ CELL 13: Save Final Model ============\n", "torch.save({\n", " 'model_state_dict': model.state_dict(),\n", " 'char_to_idx': char_to_idx,\n", " 'idx_to_char': idx_to_char,\n", " 'img_height': IMG_HEIGHT,\n", " 'img_width': IMG_WIDTH,\n", "}, 'chess_ocr_v47_complete.pth')\n", "\n", "# Download (uncomment in Colab)\n", "from google.colab import files\n", "files.download('chess_ocr_v47_complete.pth')\n", "files.download('best_chess_ocr_v47.pth')\n", "print(\"✓ Models saved!\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 338 }, "id": "SkMI8U0IPYsC", "outputId": "1a326ade-11bf-4510-bc09-f00fbf31dfe9" }, "outputs": [], "source": [ "\n", "# ============ CELL 14: Plot Results ============\n", "fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n", "\n", "axes[0].plot(history['train_loss'], label='Train')\n", "axes[0].plot(history['val_loss'], label='Val')\n", "axes[0].set_xlabel('Epoch')\n", "axes[0].set_ylabel('Loss')\n", "axes[0].set_title('Loss')\n", "axes[0].legend()\n", "\n", "axes[1].plot([a*100 for a in history['val_acc']])\n", "axes[1].set_xlabel('Epoch')\n", "axes[1].set_ylabel('Accuracy (%)')\n", "axes[1].set_title('Exact Match Accuracy')\n", "\n", "axes[2].plot([c*100 for c in history['val_cer']])\n", "axes[2].set_xlabel('Epoch')\n", "axes[2].set_ylabel('CER (%)')\n", "axes[2].set_title('Character Error Rate')\n", "\n", "plt.tight_layout()\n", "plt.savefig('training_v40_results.png', dpi=150)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "S3zgKgeyjZbr" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "01dcb79b363c4820865a19a8d1434818": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_353fef0714774e69a728279d189467a2", "placeholder": "​", "style": "IPY_MODEL_182a19bb9326402f90a208ec5e9e1829", "value": " 1.61k/? [00:00<00:00, 132kB/s]" } }, "01e88a4eeb3240e196911fffb04e488c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "065d75a276d84f8aa5df0bc3ad7e5cd8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8fee67d5099f4989b4d308841def4165", "placeholder": "​", "style": "IPY_MODEL_f5e1abdbbb7a463985a8d0a0cb7b60a4", "value": "data/train-00001-of-00002.parquet: 100%" } }, "0740aca90d474885b2fda2618574eb11": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1d6ce483f4584940a2650b4df3e99f92", "max": 13731, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_7517d295cd454dd4a2d3d5b45905ffac", "value": 13731 } }, "098db5b7992c42f6b464016bd2e3d064": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "0bae02fbd7f84bf18ecaaccdf7e745a7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1234354e5f3c4fc2b337425bd88e485b", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_987ed53098134ee18f5be552a0b69446", "value": 1 } }, "0f344c3d95e649db94ad43f9b963f048": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1234354e5f3c4fc2b337425bd88e485b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "182a19bb9326402f90a208ec5e9e1829": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1d6ce483f4584940a2650b4df3e99f92": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "287bd2d39ea441ada2598dc4f0be10e2": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3331033115044ea2985e9f86f5cde8d0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "349ecf89ae344283bc40831ceb91c334": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_065d75a276d84f8aa5df0bc3ad7e5cd8", "IPY_MODEL_4e01a5a3ef8241aa9eb31cf2bf363be7", "IPY_MODEL_68995937d21d42e0838a788f33c71027" ], "layout": "IPY_MODEL_5c13c9124a234b0b85b52c68088832ee" } }, "353fef0714774e69a728279d189467a2": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3b4a3ca4516347d6bc11b98cd7fd0d07": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8a70a2f077f74897aca169301bbb3c98", "placeholder": "​", "style": "IPY_MODEL_a75fdd66b0564876980ef05e0d11b9f5", "value": "README.md: " } }, "4e01a5a3ef8241aa9eb31cf2bf363be7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3331033115044ea2985e9f86f5cde8d0", "max": 348274364, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_098db5b7992c42f6b464016bd2e3d064", "value": 348274364 } }, "561cb931f06d467785b0f562bbf2b8d1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8682424db93c4a8fa697131aac827229", "max": 294278288, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a60bf51a42d142dbacea601ac78238a1", "value": 294278288 } }, "589f54fe217c402eb435de5b3197391e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5bcd47c7aaa84635adc6acdc9009715f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5c13c9124a234b0b85b52c68088832ee": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5cd0d2061b024f44bee56945983208a4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_b54c0a7d46754e2d968f1018bc10397b", "IPY_MODEL_0740aca90d474885b2fda2618574eb11", "IPY_MODEL_ac429c6b9e214a96b67eafbcf003afae" ], "layout": "IPY_MODEL_287bd2d39ea441ada2598dc4f0be10e2" } }, "68995937d21d42e0838a788f33c71027": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_589f54fe217c402eb435de5b3197391e", "placeholder": "​", "style": "IPY_MODEL_d1002f54bc1f45568abe534a6f402210", "value": " 348M/348M [00:08<00:00, 57.7MB/s]" } }, "7517d295cd454dd4a2d3d5b45905ffac": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "83c0281c4c2f4abe99d61fdd8716195c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8682424db93c4a8fa697131aac827229": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "868bbc2e21904e909fcd681a36653970": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "896b6a8247ab48e18b7e728f56e4ea45": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3b4a3ca4516347d6bc11b98cd7fd0d07", "IPY_MODEL_0bae02fbd7f84bf18ecaaccdf7e745a7", "IPY_MODEL_01dcb79b363c4820865a19a8d1434818" ], "layout": "IPY_MODEL_b14930674d26476ead6b0cb45cc02472" } }, "8a70a2f077f74897aca169301bbb3c98": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8fee67d5099f4989b4d308841def4165": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9586da4581e94706b5365b4e554152be": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "987ed53098134ee18f5be552a0b69446": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a60bf51a42d142dbacea601ac78238a1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a75fdd66b0564876980ef05e0d11b9f5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ac429c6b9e214a96b67eafbcf003afae": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_01e88a4eeb3240e196911fffb04e488c", "placeholder": "​", "style": "IPY_MODEL_868bbc2e21904e909fcd681a36653970", "value": " 13731/13731 [00:03<00:00, 4712.65 examples/s]" } }, "b14930674d26476ead6b0cb45cc02472": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b54c0a7d46754e2d968f1018bc10397b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9586da4581e94706b5365b4e554152be", "placeholder": "​", "style": "IPY_MODEL_83c0281c4c2f4abe99d61fdd8716195c", "value": "Generating train split: 100%" } }, "c080622acd36403e8d9aabbee227030b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d0517fc29e4440f2937de01e04faa940": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d1002f54bc1f45568abe534a6f402210": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d2095c811a634a90ad2ed01c3db9892d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c080622acd36403e8d9aabbee227030b", "placeholder": "​", "style": "IPY_MODEL_5bcd47c7aaa84635adc6acdc9009715f", "value": " 294M/294M [00:10<00:00, 25.5MB/s]" } }, "d4536bb4c52447948f21dd333efb0051": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e32f0946287b4110bba44ea1e0fa5acf", "IPY_MODEL_561cb931f06d467785b0f562bbf2b8d1", "IPY_MODEL_d2095c811a634a90ad2ed01c3db9892d" ], "layout": "IPY_MODEL_fe0b285b43174e3b8c8bf089f99aca6a" } }, "e32f0946287b4110bba44ea1e0fa5acf": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d0517fc29e4440f2937de01e04faa940", "placeholder": "​", "style": "IPY_MODEL_0f344c3d95e649db94ad43f9b963f048", "value": "data/train-00000-of-00002.parquet: 100%" } }, "f5e1abdbbb7a463985a8d0a0cb7b60a4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fe0b285b43174e3b8c8bf089f99aca6a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } } } }, "nbformat": 4, "nbformat_minor": 4 }