Vladyslav Humennyy commited on
Commit
74dc47f
·
1 Parent(s): edb7715

Fix Gradio display compatibility while preserving PIL Images for processor

Browse files

Store images as temp files for Gradio display (path field) while keeping PIL Image objects in _pil_image metadata for the processor. This fixes ValidationError from Gradio's Chatbot component which can't display PIL objects directly.

Files changed (1) hide show
  1. app.py +30 -11
app.py CHANGED
@@ -83,23 +83,32 @@ def user(user_message, image_data, history: list):
83
  stripped_message = user_message.strip()
84
  image_obj = _ensure_image_object(image_data)
85
 
 
 
 
 
 
 
 
 
 
86
  # If we have both text and image, combine them in a single message
87
- if stripped_message and image_obj is not None:
88
  updated_history.append({
89
  "role": "user",
90
  "content": [
91
  {"type": "text", "text": stripped_message},
92
- {"type": "image", "image": image_obj}
93
  ]
94
  })
95
  has_content = True
96
  elif stripped_message:
97
  updated_history.append({"role": "user", "content": stripped_message})
98
  has_content = True
99
- elif image_obj is not None:
100
  updated_history.append({
101
  "role": "user",
102
- "content": [{"type": "image", "image": image_obj}]
103
  })
104
  has_content = True
105
 
@@ -221,17 +230,27 @@ def _prepare_processor_history(history: list[dict[str, Any]]) -> list[dict[str,
221
  if item_type == "text":
222
  formatted_content.append({"type": "text", "text": item.get("text", "")})
223
  elif item_type == "image":
224
- # Include the PIL Image directly
225
- formatted_content.append({"type": "image", "image": item.get("image")})
 
 
 
 
 
226
  if formatted_content:
227
  processor_history.append({"role": role, "content": formatted_content})
228
  elif isinstance(content, dict):
229
  # Legacy format or single image
230
- if "image" in content:
231
- processor_history.append({
232
- "role": role,
233
- "content": [{"type": "image", "image": content["image"]}]
234
- })
 
 
 
 
 
235
  else:
236
  # Try to extract text
237
  text = _content_to_text(content)
 
83
  stripped_message = user_message.strip()
84
  image_obj = _ensure_image_object(image_data)
85
 
86
+ # Store image as temp file for Gradio display, but keep PIL object in metadata
87
+ if image_obj is not None:
88
+ import tempfile
89
+ fd, tmp_path = tempfile.mkstemp(suffix=".png")
90
+ os.close(fd)
91
+ image_obj.save(tmp_path, format="PNG")
92
+ else:
93
+ tmp_path = None
94
+
95
  # If we have both text and image, combine them in a single message
96
+ if stripped_message and tmp_path is not None:
97
  updated_history.append({
98
  "role": "user",
99
  "content": [
100
  {"type": "text", "text": stripped_message},
101
+ {"type": "image", "path": tmp_path, "_pil_image": image_obj}
102
  ]
103
  })
104
  has_content = True
105
  elif stripped_message:
106
  updated_history.append({"role": "user", "content": stripped_message})
107
  has_content = True
108
+ elif tmp_path is not None:
109
  updated_history.append({
110
  "role": "user",
111
+ "content": [{"type": "image", "path": tmp_path, "_pil_image": image_obj}]
112
  })
113
  has_content = True
114
 
 
230
  if item_type == "text":
231
  formatted_content.append({"type": "text", "text": item.get("text", "")})
232
  elif item_type == "image":
233
+ # Extract PIL Image from _pil_image field or load from path
234
+ pil_image = item.get("_pil_image")
235
+ if pil_image is None and "path" in item:
236
+ from PIL import Image
237
+ pil_image = Image.open(item["path"])
238
+ if pil_image is not None:
239
+ formatted_content.append({"type": "image", "image": pil_image})
240
  if formatted_content:
241
  processor_history.append({"role": role, "content": formatted_content})
242
  elif isinstance(content, dict):
243
  # Legacy format or single image
244
+ if "image" in content or "_pil_image" in content:
245
+ pil_image = content.get("_pil_image") or content.get("image")
246
+ if pil_image is None and "path" in content:
247
+ from PIL import Image
248
+ pil_image = Image.open(content["path"])
249
+ if pil_image is not None:
250
+ processor_history.append({
251
+ "role": role,
252
+ "content": [{"type": "image", "image": pil_image}]
253
+ })
254
  else:
255
  # Try to extract text
256
  text = _content_to_text(content)