| import gradio as gr |
| import requests |
| from PIL import Image |
| from transformers import BlipProcessor, BlipForConditionalGeneration |
| import time |
|
|
| Image.MAX_IMAGE_PIXELS = None |
|
|
| processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") |
|
|
| def caption(img, min_new, max_new): |
| raw_image = Image.open(img).convert('RGB') |
| raw_image.thumbnail((1024, 1024)) |
| inputs = processor(raw_image, return_tensors="pt") |
| out = model.generate( |
| **inputs, |
| min_new_tokens=min_new, |
| max_new_tokens=max_new |
| ) |
| return processor.decode(out[0], skip_special_tokens=True) |
|
|
| def greet(img, min_new, max_new): |
| if img is None: |
| return "❌ Please upload an image." |
| start = time.time() |
| try: |
| result = caption(img, min_new, max_new) |
| except Exception as e: |
| return f"⚠️ Error: {e}" |
| elapsed = time.time() - start |
| return f"{result}\n⏱ Took {elapsed:.2f} seconds" |
|
|
| iface = gr.Interface( |
| fn=greet, |
| title='BLIP Image Captioning (large)', |
| description="Uses Salesforce/blip-image-captioning-large on CPU.", |
| inputs=[ |
| gr.Image(type='filepath', label='Image'), |
| gr.Slider(label='Min New Tokens', minimum=1, maximum=50, value=5), |
| gr.Slider(label='Max New Tokens', minimum=1, maximum=100, value=20), |
| ], |
| outputs=gr.Textbox(label='Caption'), |
| theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), |
| ) |
| iface.launch() |
|
|