Spaces:

John6666
/

transformers_padding_bug_test

Paused

File size: 5,949 Bytes

7691c26

import spaces
import gradio as gr
from huggingface_hub import HfApi
from transformers.image_transforms import pad
import numpy as np
import torch
from PIL import Image
from transformers import CLIPImageProcessor, CLIPVisionModel
model = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

def _expand_for_data_format(values, input_data_format):
    """

    Convert values to be in the format expected by np.pad based on the data format.

    """
    if isinstance(values, (int, float)):
        values = ((values, values), (values, values))
    elif isinstance(values, tuple) and len(values) == 1:
        values = ((values[0], values[0]), (values[0], values[0]))
    elif isinstance(values, tuple) and len(values) == 2 and isinstance(values[0], int):
        values = (values, values)
    elif isinstance(values, tuple) and len(values) == 2 and isinstance(values[0], tuple):
        values = values
    else:
        raise ValueError(f"Unsupported format: {values}")

    # add 0 for channel dimension
    #values = ((0, 0), *values) if input_data_format == ChannelDimension.FIRST else (*values, (0, 0))
    values = ((0, 0), *values) if input_data_format == "channels_first" else (*values, (0, 0))

    # Add additional padding if there's a batch dimension
    #values = (0, *values) if image.ndim == 4 else values
    return values

#@spaces.GPU
def infer(height: int, width: int, channels: int, input_data_format: str, mode: str,

          is_numpy: bool=True, is_mul: bool=True, is_int: bool=True, is_abs: bool=True):
    try:
        pad_kwargs = {}
        pad_kwargs["mode"] = mode
        if input_data_format != "None":
            pad_kwargs["input_data_format"] = input_data_format
            pad_kwargs["data_format"] = "channels_last"

        # Example image as a NumPy array
        image = np.random.rand(height, width, channels)  # Height x Width x Channels
        image_pil = np.array(Image.fromarray(image, 'RGB')) # Open with PIL and save
        if is_mul: image = image * 255
        if is_int: image = image.astype(np.uint8)
        if is_abs: image = np.abs(image)
        print(image)
        print(image.dtype)
        print(image_pil)
        print(image_pil.dtype)

        # Define padding: ((before_height, after_height), (before_width, after_width))
        padding = ((0, 0), (112, 112))  # Pads width to make it 448

        # Apply padding
        if is_numpy:
            padded_image = np.pad(image, _expand_for_data_format(padding, input_data_format), mode="constant",
                                constant_values=_expand_for_data_format(0.0, input_data_format))
            padded_image_pil = np.pad(image_pil, _expand_for_data_format(padding, input_data_format), mode="constant",
                                    constant_values=_expand_for_data_format(0.0, input_data_format))
        else:
            padded_image = pad(image, padding=padding)
            padded_image_pil = pad(image_pil, padding=padding, **pad_kwargs)

        print("Original Image Shape:", image.shape)
        print("Padded Image Shape:", padded_image.shape)
        print("Padded Image Shape (PIL):", padded_image_pil.shape)

        image_torch = torch.tensor(image).permute(2, 0, 1).unsqueeze(0)
        padded_image_torch = torch.tensor(padded_image).permute(2, 0, 1).unsqueeze(0)
        padded_image_pil_torch = torch.tensor(padded_image_pil).permute(2, 0, 1).unsqueeze(0)

        print("Original Image Shape (Torch):", image_torch.shape)
        print("Padded Image Shape (Torch):", padded_image_torch.shape)
        print("Padded Image Shape (PIL) (Torch):", padded_image_pil_torch.shape)
        # Step 5: Pass the padded image through the model
        #outputs_padded = model(pixel_values=padded_image_torch, interpolate_pos_encoding=True)
        #outputs_original = model(pixel_values=image_torch)
        # Step 6: Extract the results for comparison
        #original = outputs_original.pooler_output
        #padded  = outputs_padded.pooler_output

        #print(torch.mean(original - padded))

        # Save images
        original_im = Image.fromarray(image, 'RGB')
        padded_im = Image.fromarray(padded_image, 'RGB')
        padded_im_pil = Image.fromarray(padded_image_pil, 'RGB')
        #original_im.save("_pad_original.png")
        #padded_im.save("_pad_padded.png")
        #padded_im_pil.save("_pad_padded_pil.png")
        return original_im, padded_im, padded_im_pil
    except Exception as e:
        raise gr.Error(e)

with gr.Blocks() as demo:
    with gr.Row(equal_height=True):
        width = gr.Number(label="Width", value=224, minimum=1, maximum=4096, step=1)
        height = gr.Number(label="Height", value=224, minimum=1, maximum=4096, step=1)
        channels = gr.Number(label="Channels", value=3, minimum=2, maximum=5, step=1)
        input_df = gr.Radio(label="Input data format", choices=["None", "channels_first", "channels_last"], value="None")
        mode = gr.Radio(label="Mode", choices=["constant", "reflect", "replicate", "symmetric"], value="constant")
        is_mul = gr.Checkbox(label="Multiply by 255", value=False)
        is_int = gr.Checkbox(label="Cast to uint8", value=False)
        is_abs = gr.Checkbox(label="Absolute value", value=False)
        is_numpy = gr.Checkbox(label="Pad by numpy", value=False)
    run_button = gr.Button("Run", variant="primary")
    with gr.Row(equal_height=True):
        output_image1 = gr.Image(label="Original")
        output_image2 = gr.Image(label="Padded")
        output_image3 = gr.Image(label="Padded (with PIL)")

    run_button.click(infer, [height, width, channels, input_df, mode, is_numpy, is_mul, is_int, is_abs],
                     [output_image1, output_image2, output_image3])

demo.launch()