File size: 20,860 Bytes
130e53d
 
a9065c7
e0ce993
2536b39
e0ce993
f52933f
130e53d
0e29f16
c5d24cb
c6ac4a0
c5d24cb
 
a9065c7
27ebbf9
ef7ad3a
a9065c7
d03d3f9
7719ac7
130e53d
e6380a7
919bf29
3b045dd
 
b7b5970
ab2add0
 
27ebbf9
3da0193
27ebbf9
 
 
d5dc5cf
 
a9065c7
5725e7b
d5dc5cf
a9065c7
 
 
 
 
 
d5dc5cf
 
0ad02a2
5725e7b
27ebbf9
5725e7b
0e29f16
a9065c7
d5dc5cf
5725e7b
0e29f16
a9065c7
 
 
edb7715
 
a9065c7
 
 
 
 
 
 
 
edb7715
a9065c7
edb7715
 
 
 
 
a9065c7
 
 
 
 
 
 
 
 
d5dc5cf
a9065c7
edb7715
 
74dc47f
 
 
 
 
 
 
 
 
edb7715
74dc47f
edb7715
 
 
 
6587188
edb7715
 
 
 
a9065c7
 
74dc47f
edb7715
 
6587188
edb7715
a9065c7
 
 
 
 
 
 
5725e7b
 
 
 
 
 
 
 
 
 
 
 
a9065c7
 
 
edb7715
a9065c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb7715
 
a9065c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb7715
a9065c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edb7715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74dc47f
 
 
 
 
 
 
edb7715
 
 
 
74dc47f
 
 
 
 
 
 
 
 
 
edb7715
 
 
 
 
 
 
 
 
f5ccf3a
 
 
 
 
 
 
 
 
 
 
 
 
 
e6d0602
 
 
 
 
 
 
 
f5ccf3a
 
 
 
 
e6d0602
 
 
 
 
 
 
 
 
f5ccf3a
 
 
 
 
 
 
 
c6ac4a0
5725e7b
a9065c7
5725e7b
 
 
4363542
a9065c7
 
 
 
 
 
0e29f16
f52933f
5725e7b
 
 
4363542
a9065c7
 
e6d0602
 
 
 
 
 
 
 
 
 
 
 
a9065c7
5725e7b
a9065c7
 
e6d0602
 
 
 
a9065c7
 
edb7715
a9065c7
edb7715
a9065c7
 
 
 
 
 
 
 
 
 
 
 
f52933f
 
5725e7b
f52933f
 
 
 
a9065c7
919bf29
4363542
 
5725e7b
919bf29
5725e7b
f52933f
919bf29
f52933f
 
 
5725e7b
f52933f
 
5725e7b
f5ccf3a
5725e7b
27ebbf9
a9065c7
27ebbf9
4363542
756e900
 
5725e7b
756e900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9065c7
756e900
 
27ebbf9
 
 
756e900
 
 
 
 
 
 
 
5725e7b
4363542
756e900
 
 
 
 
 
 
 
 
 
 
 
e676b08
756e900
 
 
 
 
 
 
 
 
 
 
 
 
a9065c7
 
 
 
 
 
 
 
 
756e900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2536b39
756e900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9065c7
756e900
 
 
 
 
 
a9065c7
 
756e900
 
 
 
 
 
a9065c7
756e900
 
 
 
 
 
5725e7b
 
756e900
 
 
5725e7b
756e900
a9065c7
756e900
 
 
5725e7b
756e900
 
 
 
 
 
 
 
 
 
5725e7b
 
756e900
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
import os
import subprocess
import tempfile

# subprocess.run('pip install flash-attn==2.8.0 --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

import threading

# subprocess.check_call([os.sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

import spaces
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, TextIteratorStreamer
from analytics import AnalyticsLogger
from kernels import get_kernel
from typing import Any

#vllm_flash_attn3 = get_kernel("kernels-community/vllm-flash-attn3")

#torch._dynamo.config.disable = True

#MODEL_ID = "le-llm/lapa-v0.1-reasoning-only-32768"
MODEL_ID = "le-llm/lapa-v0.1-instruct"
MODEL_ID = "le-llm/lapa-v0.1-matt-instruction-5e06"
MODEL_ID = "le-llm/lapa-v0.1-reprojected"

logger = AnalyticsLogger()

def _begin_analytics_session():
    # Called once per client on app load
    _ = logger.start_session(MODEL_ID)

def load_model():
    """Lazy-load model, tokenizer, and optional processor (for zeroGPU)."""
    device = "cuda"  # if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    processor = None
    try:
        processor = AutoProcessor.from_pretrained(MODEL_ID)
    except Exception as err:  # pragma: no cover - informative fallback
        print(f"Warning: AutoProcessor not available ({err}). Falling back to tokenizer.")

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        dtype=torch.bfloat16,  # if device == "cuda" else torch.float32,
        device_map="auto",  # if device == "cuda" else None,
        attn_implementation="flash_attention_2",# "kernels-community/vllm-flash-attn3", #  #
    )  # .cuda()
    print(f"Selected device:", device)
    return model, tokenizer, processor, device


# Load model/tokenizer each request → allows zeroGPU to cold start & then release
model, tokenizer, processor, device = load_model()


def _ensure_image_object(image_data: Any) -> Any | None:
    """Return a PIL Image object for the provided image data."""
    if image_data is None:
        return None

    try:
        from PIL import Image
    except ImportError:  # pragma: no cover - PIL is bundled with Gradio's image component
        return None

    # Already a PIL Image
    if isinstance(image_data, Image.Image):
        return image_data

    # Load from path
    if isinstance(image_data, str) and os.path.exists(image_data):
        return Image.open(image_data)

    return None


def user(user_message, image_data, history: list):
    user_message = user_message or ""

    updated_history = list(history)
    has_content = False

    stripped_message = user_message.strip()
    image_obj = _ensure_image_object(image_data)

    # Store image as temp file for Gradio display, but keep PIL object in metadata
    if image_obj is not None:
        import tempfile
        fd, tmp_path = tempfile.mkstemp(suffix=".png")
        os.close(fd)
        image_obj.save(tmp_path, format="PNG")
    else:
        tmp_path = None

    # If we have both text and image, combine them in a single message
    if stripped_message and tmp_path is not None:
        updated_history.append({
            "role": "user",
            "content": [
                {"type": "text", "text": stripped_message},
                {"type": "image", "path": tmp_path, "alt_text": "uploaded image", "_pil_image": image_obj}
            ]
        })
        has_content = True
    elif stripped_message:
        updated_history.append({"role": "user", "content": stripped_message})
        has_content = True
    elif tmp_path is not None:
        updated_history.append({
            "role": "user",
            "content": [{"type": "image", "path": tmp_path, "alt_text": "uploaded image", "_pil_image": image_obj}]
        })
        has_content = True

    if not has_content:
        # Nothing to submit yet; keep inputs unchanged
        return user_message, image_data, history

    return "", None, updated_history


def append_example_message(x: gr.SelectData, history):
    print(x)
    print(x.value)
    print(x.value["text"])
    if x.value["text"] is not None:
        history.append({"role": "user", "content": x.value["text"]})

    return history


def _message_contains_image(message: dict[str, Any]) -> bool:
    content = message.get("content")
    if isinstance(content, dict):
        if "path" in content or "image" in content:
            return True
        if content.get("type") in {"image", "image_url"}:
            return True
    if isinstance(content, list):
        for item in content:
            if isinstance(item, dict) and item.get("type") in {"image", "image_url"}:
                return True
    return False


def _content_to_text(content: Any) -> str:
    if isinstance(content, dict):
        if "text" in content:
            return content.get("text", "")
        if "path" in content:
            alt_text = content.get("alt_text")
            placeholder = alt_text or os.path.basename(content["path"]) or "image"
            return f"[image: {placeholder}]"
        if "image" in content:
            return "[image]"
        if content.get("type") == "image_url":
            image_url = content.get("image_url")
            if isinstance(image_url, dict):
                image_url = image_url.get("url", "")
            return f"[image: {image_url}]"
        if content.get("type") == "text":
            return content.get("text", "")
        return str(content)
    if isinstance(content, list):
        text_parts: list[str] = []
        for item in content:
            if isinstance(item, dict):
                item_type = item.get("type")
                if item_type == "text":
                    text_parts.append(item.get("text", ""))
                elif item_type == "image":
                    text_parts.append("[image]")
                elif item_type == "image_url":
                    image_url = item.get("image_url")
                    if isinstance(image_url, dict):
                        image_url = image_url.get("url", "")
                    text_parts.append(f"[image: {image_url}]")
                else:
                    text_parts.append(str(item))
            else:
                text_parts.append(str(item))
        filtered = [part for part in text_parts if part]
        return "\n".join(filtered) if filtered else "[image]"
    return str(content)


def _collect_recent_user_contents(history: list[dict[str, Any]]) -> list[Any]:
    """Collect the trailing sequence of user messages prior to the assistant reply."""
    chunks: list[Any] = []
    for message in reversed(history):
        if message.get("role") != "user":
            break
        chunks.append(message.get("content"))
    chunks.reverse()
    return chunks


def _prepare_text_history(history: list[dict[str, Any]]) -> list[dict[str, str]]:
    text_history: list[dict[str, str]] = []
    for message in history:
        role = message.get("role", "user")
        content_text = _content_to_text(message.get("content"))
        if not content_text:
            continue
        if text_history and text_history[-1]["role"] == role:
            text_history[-1]["content"] = text_history[-1]["content"] + "\n" + content_text
        else:
            text_history.append({"role": role, "content": content_text})
    return text_history


def _prepare_processor_history(history: list[dict[str, Any]]) -> list[dict[str, Any]]:
    """Prepare history for processor with proper image format."""
    processor_history = []

    for message in history:
        role = message.get("role", "user")
        content = message.get("content")

        # Handle different content formats
        if isinstance(content, str):
            # Simple text message
            processor_history.append({"role": role, "content": content})
        elif isinstance(content, list):
            # Multi-modal content (text + images)
            formatted_content = []
            for item in content:
                if isinstance(item, dict):
                    item_type = item.get("type")
                    if item_type == "text":
                        formatted_content.append({"type": "text", "text": item.get("text", "")})
                    elif item_type == "image":
                        # Extract PIL Image from _pil_image field or load from path
                        pil_image = item.get("_pil_image")
                        if pil_image is None and "path" in item:
                            from PIL import Image
                            pil_image = Image.open(item["path"])
                        if pil_image is not None:
                            formatted_content.append({"type": "image", "image": pil_image})
            if formatted_content:
                processor_history.append({"role": role, "content": formatted_content})
        elif isinstance(content, dict):
            # Legacy format or single image
            if "image" in content or "_pil_image" in content:
                pil_image = content.get("_pil_image") or content.get("image")
                if pil_image is None and "path" in content:
                    from PIL import Image
                    pil_image = Image.open(content["path"])
                if pil_image is not None:
                    processor_history.append({
                        "role": role,
                        "content": [{"type": "image", "image": pil_image}]
                    })
            else:
                # Try to extract text
                text = _content_to_text(content)
                if text:
                    processor_history.append({"role": role, "content": text})

    return processor_history


def _clean_history_for_display(history: list[dict[str, Any]]) -> list[dict[str, Any]]:
    """Remove internal metadata fields like _pil_image before displaying in Gradio."""
    cleaned = []

    for message in history:
        cleaned_message = {"role": message.get("role", "user")}
        content = message.get("content")

        if isinstance(content, str):
            cleaned_message["content"] = content
        elif isinstance(content, list):
            cleaned_content = []
            for item in content:
                if isinstance(item, dict):
                    # Remove _pil_image and ensure alt_text is string or absent
                    cleaned_item = {}
                    for k, v in item.items():
                        if k == "_pil_image":
                            continue
                        if k == "alt_text" and not isinstance(v, str):
                            continue
                        cleaned_item[k] = v
                    cleaned_content.append(cleaned_item)
                else:
                    cleaned_content.append(item)
            cleaned_message["content"] = cleaned_content
        elif isinstance(content, dict):
            # Remove _pil_image and ensure alt_text is string or absent
            cleaned_item = {}
            for k, v in content.items():
                if k == "_pil_image":
                    continue
                if k == "alt_text" and not isinstance(v, str):
                    continue
                cleaned_item[k] = v
            cleaned_message["content"] = cleaned_item
        else:
            cleaned_message["content"] = content

        cleaned.append(cleaned_message)

    return cleaned


@spaces.GPU
def bot(
    history: list[dict[str, Any]]
    # max_tokens,
    # temperature,
    # top_p,
):
    user_chunks = _collect_recent_user_contents(history)
    if not user_chunks:
        user_message_text = ""
    else:
        user_message_text = "\n".join(filter(None, (_content_to_text(chunk) for chunk in user_chunks)))
    print('User message:', user_message_text)
    # [{"role": "system", "content": system_message}] +
    # Build conversation
    max_tokens = 4096
    temperature = 0.7
    top_p = 0.95

    text_history = _prepare_text_history(history)

    # Handle empty history case
    if not text_history:
        input_text = ""
    else:
        input_text: str = tokenizer.apply_chat_template(
            text_history,
            tokenize=False,
            add_generation_prompt=True,
            # enable_thinking=True,
        )

    if input_text and tokenizer.bos_token:
        input_text = input_text.replace(tokenizer.bos_token, "", 1)
    print(input_text)
    model_inputs = None

    # Early return if no input
    if not input_text and not any(_message_contains_image(msg) for msg in history):
        return

    if processor is not None and any(_message_contains_image(msg) for msg in history):
        try:
            processor_history = _prepare_processor_history(history)
            model_inputs = processor(
                messages=processor_history,
                return_tensors="pt",
                add_generation_prompt=True,
            ).to(model.device)
        except Exception as exc:  # pragma: no cover - diagnostic logging
            print(f"Processor failed, using tokenizer pipeline instead: {exc}")

    if model_inputs is None:
        model_inputs = tokenizer(input_text, return_tensors="pt").to(model.device)  # .to(device)

    decoded_input = tokenizer.decode(model_inputs["input_ids"][0])
    print("Decoded input:", decoded_input)
    print([{int(token_id.item()): tokenizer.decode([int(token_id.item())])} for token_id in model_inputs["input_ids"][0]])
    # Streamer setup
    streamer = TextIteratorStreamer(
        tokenizer, skip_prompt=True  # skip_special_tokens=True  # ,
    )

    # Run model.generate in background thread
    generation_kwargs = dict(
        **model_inputs,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        top_k=64,
        do_sample=True,
        # eos_token_id=tokenizer.eos_token_id,
        streamer=streamer,
    )
    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    history.append({"role": "assistant", "content": ""})
    # Yield tokens as they come in
    for new_text in streamer:
        history[-1]["content"] += new_text
        yield _clean_history_for_display(history)

    assistant_message = history[-1]["content"]
    logger.log_interaction(user=user_message_text, answer=assistant_message)


# --- drop-in UI compatible with older Gradio versions ---
import os, tempfile, time
import gradio as gr

# Ukrainian-inspired theme with deep, muted colors reflecting unbeatable spirit:
THEME = gr.themes.Soft(
    primary_hue="blue",      # Deep blue representing Ukrainian sky and resolve
    secondary_hue="amber",   # Warm amber representing golden fields and determination  
    neutral_hue="stone",     # Earthy stone representing strength and foundation
)

# Load CSS from external file
def load_css():
    try:
        with open("static/style.css", "r", encoding="utf-8") as f:
            return f.read()
    except FileNotFoundError:
        print("Warning: static/style.css not found")
        return ""

CSS = load_css()

def _clear_chat():
    return "", None, []

with gr.Blocks(theme=THEME, css=CSS, fill_height=True) as demo:
    demo.load(fn=_begin_analytics_session, inputs=None, outputs=None)


    # Header (no gr.Box to avoid version issues)
    gr.HTML(
        """
        <div id="app-header">
          <div class="app-title">✨ LAPA</div>
          <div class="app-subtitle">LLM for Ukrainian Language</div>
        </div>
        """
    )

    with gr.Row(equal_height=True):
        # Left side: Chat
        with gr.Column(scale=7, elem_id="left-pane"):
            with gr.Column(elem_id="chat-card"):
                chatbot = gr.Chatbot(
                    type="messages",
                    height=560,
                    render_markdown=True,
                    show_copy_button=True,
                    show_label=False,
                    # likeable=True,
                    allow_tags=["think"],
                    elem_id="chatbot",
                    examples=[
                        {"text": i}
                        for i in [
                            "хто тримає цей район?",
                            "Напиши історію про Івасика-Телесика",
                            "Яка найвища гора в Україні?",
                            "Як звали батька Тараса Григоровича Шевченка?",
                            "Яка з цих гір не знаходиться у Європі? Говерла, Монблан, Гран-Парадізо, Еверест",
                            "Дай відповідь на питання\nЧому у качки жовті ноги?",
                        ]
                    ],
                )

            image_input = gr.Image(
                label="Attach image (optional)",
                type="pil",
                sources=["upload", "clipboard"],
                height=200,
                interactive=True,
                elem_id="image-input",
            )

            # ChatGPT-style input box with stop button
            with gr.Row(elem_id="chat-input-row"):
                msg = gr.Textbox(
                    label=None,
                    placeholder="Message… (Press Enter to send)",
                    autofocus=True,
                    lines=1,
                    max_lines=6,
                    container=False,
                    show_label=False,
                    elem_id="chat-input",
                    elem_classes=["chat-input-box"]
                )
                stop_btn_visible = gr.Button(
                    "⏹️", 
                    variant="secondary", 
                    elem_id="stop-btn-visible",
                    elem_classes=["stop-btn-chat"],
                    visible=False,
                    size="sm"
                )
            
            # Hidden buttons for functionality
            with gr.Row(visible=True, elem_id="hidden-buttons"):
                send_btn = gr.Button("Send", variant="primary", elem_id="send-btn")
                stop_btn = gr.Button("Stop", variant="secondary", elem_id="stop-btn")
                clear_btn = gr.Button("Clear", variant="secondary", elem_id="clear-btn")

            # export_btn = gr.Button("Export chat (.md)", variant="secondary", elem_classes=["rounded-btn","secondary-btn"])
            # exported_file = gr.File(label="", interactive=False, visible=True)
            gr.HTML('<div class="footer-tip">Shortcuts: Enter to send • Shift+Enter for new line</div>')

    # Helper functions for managing UI state
    def show_stop_button():
        return gr.update(visible=True)
    
    def hide_stop_button():
        return gr.update(visible=False)

    # Events (preserve your original handlers)
    e1 = msg.submit(fn=user, inputs=[msg, image_input, chatbot], outputs=[msg, image_input, chatbot], queue=True).then(
        fn=show_stop_button, inputs=None, outputs=stop_btn_visible
    ).then(
        fn=bot, inputs=chatbot, outputs=chatbot
    ).then(
        fn=hide_stop_button, inputs=None, outputs=stop_btn_visible
    )

    e2 = send_btn.click(fn=user, inputs=[msg, image_input, chatbot], outputs=[msg, image_input, chatbot], queue=True).then(
        fn=show_stop_button, inputs=None, outputs=stop_btn_visible
    ).then(
        fn=bot, inputs=chatbot, outputs=chatbot
    ).then(
        fn=hide_stop_button, inputs=None, outputs=stop_btn_visible
    )

    e3 = chatbot.example_select(fn=append_example_message, inputs=[chatbot], outputs=[chatbot], queue=True).then(
        fn=show_stop_button, inputs=None, outputs=stop_btn_visible
    ).then(
        fn=bot, inputs=chatbot, outputs=chatbot
    ).then(
        fn=hide_stop_button, inputs=None, outputs=stop_btn_visible
    )

    # Stop cancels running events (both buttons work)
    stop_btn.click(fn=hide_stop_button, inputs=None, outputs=stop_btn_visible, cancels=[e1, e2, e3], queue=True)
    stop_btn_visible.click(fn=hide_stop_button, inputs=None, outputs=stop_btn_visible, cancels=[e1, e2, e3], queue=True)

    # Clear chat + input
    clear_btn.click(fn=_clear_chat, inputs=None, outputs=[msg, image_input, chatbot])

    # Export markdown
    # export_btn.click(fn=_export_markdown, inputs=chatbot, outputs=exported_file)

    # Load and inject external JavaScript
    def load_javascript():
        try:
            with open("static/script.js", "r", encoding="utf-8") as f:
                return f"<script>{f.read()}</script>"
        except FileNotFoundError:
            print("Warning: static/script.js not found")
            return ""
    
    gr.HTML(load_javascript())

if __name__ == "__main__":
    demo.queue().launch()