Gemini-Image-Edit

Sleeping

App Files Files Community

ameerazam08 commited on Mar 17

Commit

e60b597

verified ·

1 Parent(s): fec7900

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -65

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import time
 import uuid
 import tempfile
-from PIL import Image
 import gradio as gr
 import base64
 import mimetypes
@@ -20,10 +20,8 @@ def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
     client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
                                      else os.environ.get("GEMINI_API_KEY")))
-    files = [
-        client.files.upload(file=file_name),
-    ]
     contents = [
         types.Content(
             role="user",
@@ -41,13 +39,13 @@ def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
         top_p=0.95,
         top_k=40,
         max_output_tokens=8192,
-        response_modalities=[
-            "image",
-            "text",
-        ],
         response_mime_type="text/plain",
     )
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
         temp_path = tmp.name
         for chunk in client.models.generate_content_stream(
@@ -57,19 +55,20 @@ def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
         ):
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
-            inline_data = chunk.candidates[0].content.parts[0].inline_data
-            if inline_data:
-                save_binary_file(temp_path, inline_data.data)
-                print(
-                    "File of mime type "
-                    f"{inline_data.mime_type} saved to: {temp_path} and prompt input :{text}"
-                )
             else:
-                print(chunk.text)
     del files
-    return temp_path
 def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
     try:
@@ -77,61 +76,59 @@ def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             composite_path = tmp.name
             composite_pil.save(composite_path)
         file_name = composite_path
         input_text = prompt
         model = "gemini-2.0-flash-exp"
-        gemma_edited_image_path = generate(text=input_text, file_name=file_name, api_key=gemini_api_key, model=model)
-        print("image_path ", gemma_edited_image_path)
-        result_img = Image.open(gemma_edited_image_path)
-        if result_img.mode == "RGBA":
-            result_img = result_img.convert("RGB")
-        return [result_img]
     except Exception as e:
         raise gr.Error(f"Error Getting {e}", duration=5)
-# Build a Blocks-based interface to include the custom HTML header.
 with gr.Blocks() as demo:
-    # HTML Header for the application.
     gr.HTML(
     """
     <div style='display: flex; align-items: center; justify-content: center; gap: 20px'>
-    <div style="background-color: var(--block-background-fill); border-radius: 8px">
-        <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;">
-    </div>
-    <div>
-        <h1>Gen AI Image Editing</h1>
-        <p>Gemini using for Image Editing</p>
-        <p>Powered by <a href="https://gradio.app/">Gradio</a> ⚡️</p>
-        <p> Duplicate Repo <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a></p>
-        <p>Get an API Key <a href="https://aistudio.google.com/apikey">here</a></p>
-        <p>Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
-    </div>
     </div>
     """
     )
-    # Title and description.
-    # Define examples to be shown within the Gradio interface
-    examples = [
-        # Each example is a list corresponding to the inputs:
-        # [Input Image, Prompt, Guidance Scale, Number of Steps, LoRA Name]
-        ["data/1.webp", 'change text to "AMEER"'],
-        ["data/2.webp", "remove the spoon from  hand only"],
-        ["data/3.webp", 'change text to "Make it "'],
-        ["data/1.jpg", "add  joker style only on face"],
-         ["data/1777043.jpg", "add  joker style only on face"],
-         ["data/2807615.jpg","add lipstick on lip only "],
-         ["data/76860.jpg", "add lipstick on lip only "],
-         ["data/2807615.jpg", "make it happy looking face only"],
-    ]
-    gr.Markdown("Upload an image and enter a prompt to generate outputs in the gallery. Do not Use NFSW Images")
     with gr.Row():
         with gr.Column():
@@ -143,7 +140,7 @@ with gr.Blocks() as demo:
             gemini_api_key = gr.Textbox(
                 lines=1,
                 placeholder="Enter Gemini API Key (optional)",
-                label="Gemini API Key (optional) Generate and fill here"
             )
             prompt_input = gr.Textbox(
                 lines=2,
@@ -153,18 +150,30 @@ with gr.Blocks() as demo:
             submit_btn = gr.Button("Generate")
         with gr.Column():
             output_gallery = gr.Gallery(label="Generated Outputs")
-    # Set up the interaction.
     submit_btn.click(
         fn=process_image_and_prompt,
         inputs=[image_input, prompt_input, gemini_api_key],
-        outputs=output_gallery,
     )
     gr.Examples(
         examples=examples,
         inputs=[image_input, prompt_input, gemini_api_key],
         label="Try these examples"
     )
-demo.queue(max_size=500).launch()

 import time
 import uuid
 import tempfile
+from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
 import base64
 import mimetypes
     client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
                                      else os.environ.get("GEMINI_API_KEY")))
+    files = [ client.files.upload(file=file_name) ]
     contents = [
         types.Content(
             role="user",
         top_p=0.95,
         top_k=40,
         max_output_tokens=8192,
+        response_modalities=["image", "text"],
         response_mime_type="text/plain",
     )
+    text_response = ""
+    image_path = None
+    # Create a temporary file to potentially store image data.
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
         temp_path = tmp.name
         for chunk in client.models.generate_content_stream(
         ):
             if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                 continue
+            candidate = chunk.candidates[0].content.parts[0]
+            # Check for inline image data
+            if candidate.inline_data:
+                save_binary_file(temp_path, candidate.inline_data.data)
+                print(f"File of mime type {candidate.inline_data.mime_type} saved to: {temp_path} and prompt input: {text}")
+                image_path = temp_path
+                # If an image is found, we assume that is the desired output.
+                break
             else:
+                # Accumulate text response if no inline_data is present.
+                text_response += chunk.text + "\n"
     del files
+    return image_path, text_response
 def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
     try:
         with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
             composite_path = tmp.name
             composite_pil.save(composite_path)
         file_name = composite_path
         input_text = prompt
         model = "gemini-2.0-flash-exp"
+        image_path, text_response = generate(text=input_text, file_name=file_name, api_key=gemini_api_key, model=model)
+        if image_path:
+            # Load and convert the image if needed.
+            result_img = Image.open(image_path)
+            if result_img.mode == "RGBA":
+                result_img = result_img.convert("RGB")
+            return [result_img], ""  # Return image in gallery and empty text output.
+        else:
+            # Return no image and the text response.
+            return None, text_response
     except Exception as e:
         raise gr.Error(f"Error Getting {e}", duration=5)
+# Build a Blocks-based interface with a custom HTML header.
 with gr.Blocks() as demo:
     gr.HTML(
     """
     <div style='display: flex; align-items: center; justify-content: center; gap: 20px'>
+      <div style="background-color: var(--block-background-fill); border-radius: 8px">
+          <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" style="width: 100px; height: 100px;">
+      </div>
+      <div>
+          <h1>Gen AI Image Editing</h1>
+          <p>Gemini for Image Editing</p>
+          <p>Powered by <a href="https://gradio.app/">Gradio</a> ⚡️</p>
+          <p>Duplicate Repo <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Duplicate</a></p>
+          <p>Get an API Key <a href="https://aistudio.google.com/apikey">here</a></p>
+          <p>Follow me on Twitter: <a href="https://x.com/Ameerazam18">Ameerazam18</a></p>
+      </div>
     </div>
     """
     )
+    gr.Markdown("""
+## ⚠️ API Configuration ⚠️
+- **Issue:** ❗ Sometimes the model returns text instead of an image, causing failures when saving as an image.
+### 🔧 Steps to Address:
+1. **🛠️ Duplicate the Repository**
+   - Create a separate copy for modifications.
+2. **🔑 Use Your Own Gemini API Key**
+   - You **must** configure your own Gemini key for generation!
+---
+### 📌 Usage
+- Upload an image and enter a prompt to generate outputs.
+- If text is returned instead of an image, it will appear in the text output.
+- ❌ **Do not use NSFW images!**
+""")
     with gr.Row():
         with gr.Column():
             gemini_api_key = gr.Textbox(
                 lines=1,
                 placeholder="Enter Gemini API Key (optional)",
+                label="Gemini API Key (optional)"
             )
             prompt_input = gr.Textbox(
                 lines=2,
             submit_btn = gr.Button("Generate")
         with gr.Column():
             output_gallery = gr.Gallery(label="Generated Outputs")
+            output_text = gr.Textbox(label="Gemini Output", placeholder="Text response will appear here if no image is generated.")
+    # Set up the interaction with two outputs.
     submit_btn.click(
         fn=process_image_and_prompt,
         inputs=[image_input, prompt_input, gemini_api_key],
+        outputs=[output_gallery, output_text],
     )
+    examples = [
+        ["data/1.webp", 'change text to "AMEER"', ""],
+        ["data/2.webp", "remove the spoon from hand only", ""],
+        ["data/3.webp", 'change text to "Make it "', ""],
+        ["data/1.jpg", "add joker style only on face", ""],
+        ["data/1777043.jpg", "add joker style only on face", ""],
+        ["data/2807615.jpg", "add lipstick on lip only", ""],
+        ["data/76860.jpg", "add lipstick on lip only", ""],
+        ["data/2807615.jpg", "make it happy looking face only", ""],
+    ]
     gr.Examples(
         examples=examples,
         inputs=[image_input, prompt_input, gemini_api_key],
         label="Try these examples"
     )
+demo.queue(max_size=500).launch()