Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ import uuid
|
|
| 7 |
from gtts import gTTS
|
| 8 |
import cv2
|
| 9 |
import numpy as np
|
|
|
|
| 10 |
|
| 11 |
# --- Configuration ---
|
| 12 |
API_KEY = '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'
|
|
@@ -18,16 +19,18 @@ client = OpenAI(
|
|
| 18 |
|
| 19 |
# --- Helper Functions ---
|
| 20 |
|
| 21 |
-
def describe_image(
|
|
|
|
|
|
|
|
|
|
| 22 |
response = client.chat.completions.create(
|
| 23 |
-
|
| 24 |
model="opengvlab/internvl3-14b:free",
|
| 25 |
messages=[
|
| 26 |
{
|
| 27 |
"role": "user",
|
| 28 |
"content": [
|
| 29 |
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
|
| 30 |
-
{"type": "image_url", "image_url": {"url":
|
| 31 |
]
|
| 32 |
}
|
| 33 |
]
|
|
@@ -41,15 +44,6 @@ def speak(text, filename=None):
|
|
| 41 |
tts.save(filename)
|
| 42 |
return filename
|
| 43 |
|
| 44 |
-
def image_to_array(uploaded_image):
|
| 45 |
-
img = Image.open(uploaded_image)
|
| 46 |
-
img = img.convert('RGB') # Ensure 3 channels
|
| 47 |
-
return np.array(img)
|
| 48 |
-
|
| 49 |
-
def array_to_base64(img_array):
|
| 50 |
-
_, buffer = cv2.imencode('.jpg', img_array)
|
| 51 |
-
return "data:image/jpeg;base64," + buffer.tobytes().hex()
|
| 52 |
-
|
| 53 |
# --- Streamlit UI ---
|
| 54 |
|
| 55 |
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
|
|
@@ -63,15 +57,10 @@ if camera_image is not None:
|
|
| 63 |
st.image(camera_image, caption="Captured Frame", use_column_width=True)
|
| 64 |
|
| 65 |
with st.spinner("Analyzing the scene..."):
|
| 66 |
-
#
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
# Simulate URL (in production, you'd upload to cloud storage)
|
| 72 |
-
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
| 73 |
-
|
| 74 |
-
description = describe_image(image_url)
|
| 75 |
|
| 76 |
st.subheader("📝 Description")
|
| 77 |
st.write(description)
|
|
@@ -82,7 +71,6 @@ if camera_image is not None:
|
|
| 82 |
st.audio(audio_bytes, format='audio/mp3')
|
| 83 |
|
| 84 |
# Cleanup
|
| 85 |
-
os.remove(temp_path)
|
| 86 |
os.remove(audio_file)
|
| 87 |
|
| 88 |
st.markdown("---")
|
|
|
|
| 7 |
from gtts import gTTS
|
| 8 |
import cv2
|
| 9 |
import numpy as np
|
| 10 |
+
import base64
|
| 11 |
|
| 12 |
# --- Configuration ---
|
| 13 |
API_KEY = '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'
|
|
|
|
| 19 |
|
| 20 |
# --- Helper Functions ---
|
| 21 |
|
| 22 |
+
def describe_image(image_bytes):
|
| 23 |
+
# Convert to base64
|
| 24 |
+
base64_image = base64.b64encode(image_bytes).decode('utf-8')
|
| 25 |
+
|
| 26 |
response = client.chat.completions.create(
|
|
|
|
| 27 |
model="opengvlab/internvl3-14b:free",
|
| 28 |
messages=[
|
| 29 |
{
|
| 30 |
"role": "user",
|
| 31 |
"content": [
|
| 32 |
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
|
| 33 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
|
| 34 |
]
|
| 35 |
}
|
| 36 |
]
|
|
|
|
| 44 |
tts.save(filename)
|
| 45 |
return filename
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
# --- Streamlit UI ---
|
| 48 |
|
| 49 |
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
|
|
|
|
| 57 |
st.image(camera_image, caption="Captured Frame", use_column_width=True)
|
| 58 |
|
| 59 |
with st.spinner("Analyzing the scene..."):
|
| 60 |
+
# Read the image bytes directly
|
| 61 |
+
image_bytes = camera_image.getvalue()
|
| 62 |
+
|
| 63 |
+
description = describe_image(image_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
st.subheader("📝 Description")
|
| 66 |
st.write(description)
|
|
|
|
| 71 |
st.audio(audio_bytes, format='audio/mp3')
|
| 72 |
|
| 73 |
# Cleanup
|
|
|
|
| 74 |
os.remove(audio_file)
|
| 75 |
|
| 76 |
st.markdown("---")
|