| import easyocr |
| import numpy as np |
| from PIL import Image |
| from transformers import pipeline |
| import gradio as gr |
| import pdf2image |
| import PyPDF2 |
| import io |
| import pandas as pd |
| import logging |
| from datetime import datetime |
| import os |
| import torch |
|
|
| |
| os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512' |
| torch.backends.cudnn.benchmark = True |
|
|
| |
| |
| |
| |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| |
| device = 0 if torch.cuda.is_available() else -1 |
| logger.info(f"Using device: {'CUDA' if device == 0 else 'CPU'}") |
|
|
| |
| def init_models(): |
| try: |
| |
| reader = easyocr.Reader(['en'], gpu=bool(device == 0), |
| model_storage_directory='./models', |
| download_enabled=True) |
| |
| |
| text_classifier = pipeline( |
| "text-classification", |
| model="distilbert-base-uncased-finetuned-sst-2-english", |
| device=device, |
| model_kwargs={"low_cpu_mem_usage": True} |
| ) |
| |
| |
| doc_classifier = pipeline( |
| "image-classification", |
| model="microsoft/dit-base-finetuned-rvlcdip", |
| device=device, |
| model_kwargs={"low_cpu_mem_usage": True} |
| ) |
| |
| return reader, text_classifier, doc_classifier |
| except Exception as e: |
| logger.error(f"Error initializing models: {str(e)}") |
| raise |
|
|
| try: |
| logger.info("Initializing models...") |
| reader, text_classifier, doc_classifier = init_models() |
| logger.info("Models initialized successfully") |
| except Exception as e: |
| logger.error(f"Failed to initialize models: {str(e)}") |
| raise |
|
|
| def validate_insurance_claim(text): |
| """Validate if the text contains insurance claim related content""" |
| keywords = ['claim', 'policy', 'insurance', 'damage', 'loss', 'accident', 'coverage'] |
| return any(keyword in text.lower() for keyword in keywords) |
|
|
| def process_document(file): |
| try: |
| if file is None: |
| return "Please upload an insurance claim document", None, None |
|
|
| |
| file_extension = os.path.splitext(file.name)[1].lower() |
|
|
| |
| if file_extension == '.pdf': |
| try: |
| images = pdf2image.convert_from_bytes(file.read(), first_page=1, last_page=1) |
| if not images: |
| return "Failed to process insurance claim PDF", None, None |
| image = images[0] |
| except Exception as e: |
| logger.error(f"PDF processing error: {str(e)}") |
| return "Error processing PDF file", None, None |
|
|
| |
| elif file_extension in ('.png', '.jpg', '.jpeg'): |
| try: |
| image = Image.open(file) |
| except Exception as e: |
| logger.error(f"Image processing error: {str(e)}") |
| return "Error processing image file", None, None |
| else: |
| return "Unsupported file format. Please upload PDF or image files.", None, None |
|
|
| |
| if image.mode != 'RGB': |
| image = image.convert('RGB') |
|
|
| |
| try: |
| result = reader.readtext(np.array(image)) |
| text = ' '.join([t[1] for t in result]) |
| except Exception as e: |
| logger.error(f"Text extraction error: {str(e)}") |
| return "Error extracting text from document", None, None |
|
|
| |
| formatted_text = format_insurance_claim(text) |
|
|
| |
| if not validate_insurance_claim(text): |
| return "Document does not appear to be an insurance claim", None, None |
|
|
| |
| try: |
| text_analysis = text_classifier(text[:512])[0] |
| except Exception as e: |
| logger.error(f"Text classification error: {str(e)}") |
| text_analysis = {'score': 0.5} |
|
|
| |
| try: |
| doc_analysis = doc_classifier(image)[0] |
| except Exception as e: |
| logger.error(f"Document classification error: {str(e)}") |
| doc_analysis = {'score': 0.5} |
|
|
| |
| validation_result = analyze_claim_validity(text_analysis['score']) |
| |
| return ( |
| formatted_text, |
| f"Claim Status: {validation_result['status']}\n" + |
| f"Confidence Score: {text_analysis['score']:.2f}\n" + |
| f"Validation Notes: {validation_result['notes']}", |
| f"Document Type: Insurance Claim Form\n" + |
| f"Form Type: NUCC Health Insurance Claim\n" + |
| f"Confidence: {doc_analysis['score']:.2f}" |
| ) |
|
|
| except Exception as e: |
| logger.error(f"General processing error: {str(e)}") |
| return "Error processing document", None, None |
|
|
| def format_insurance_claim(text): |
| """Format the extracted text in a more readable way""" |
| |
| lines = text.split('\n') |
| formatted_lines = [] |
| |
| key_fields = { |
| 'Insured Name': '', |
| 'Policy Number': '', |
| 'Provider': '', |
| 'Date of Service': '', |
| 'Claim Details': '' |
| } |
| |
| |
| for line in lines: |
| if 'HEALTH INSURANCE CLAIM FORM' in line: |
| formatted_lines.append(f"Document Type: {line.strip()}") |
| elif any(field in line for field in ['Name:', 'Policy', 'Provider', 'Date']): |
| formatted_lines.append(line.strip()) |
| |
| return '\n'.join(formatted_lines) |
|
|
| def analyze_claim_validity(score): |
| """Provide more detailed validation analysis""" |
| if score > 0.9: |
| return { |
| 'status': 'VALID', |
| 'notes': 'High confidence in claim validity. All required fields present.' |
| } |
| elif score > 0.7: |
| return { |
| 'status': 'VALID - REVIEW RECOMMENDED', |
| 'notes': 'Claim appears valid but manual review suggested.' |
| } |
| else: |
| return { |
| 'status': 'NEEDS REVIEW', |
| 'notes': 'Low confidence score. Please review manually.' |
| } |
|
|
| |
| custom_css = """ |
| .gradio-container { |
| max-width: 900px !important; |
| margin: auto; |
| padding-top: 1.5rem; |
| padding-bottom: 1.5rem; |
| } |
| |
| .main-div { |
| display: flex; |
| flex-direction: column; |
| gap: 20px; |
| } |
| |
| .container { |
| border-radius: 10px; |
| background-color: #ffffff; |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
| margin-bottom: 20px; |
| padding: 20px; |
| } |
| |
| .output-div { |
| min-height: 100px; |
| margin-bottom: 10px; |
| } |
| |
| h1 { |
| color: #2a4365; |
| text-align: center; |
| font-size: 2.5rem; |
| margin-bottom: 1rem; |
| font-weight: bold; |
| } |
| |
| .description { |
| text-align: center; |
| color: #4a5568; |
| margin-bottom: 2rem; |
| } |
| |
| .file-upload { |
| border: 2px dashed #cbd5e0; |
| border-radius: 8px; |
| padding: 20px; |
| text-align: center; |
| transition: all 0.3s ease; |
| } |
| |
| .file-upload:hover { |
| border-color: #4299e1; |
| } |
| |
| .output-label { |
| font-weight: bold; |
| color: #2d3748; |
| margin-bottom: 0.5rem; |
| } |
| |
| .output-text { |
| background-color: #f7fafc; |
| border-radius: 6px; |
| padding: 12px; |
| } |
| """ |
|
|
| |
| with gr.Blocks(css=custom_css) as iface: |
| gr.HTML("<h1>🔍 Automated Insurance Claim Validation System</h1>") |
| gr.HTML(""" |
| <div class="description"> |
| Upload insurance claim documents (PDF or image) for automated validation and analysis. |
| Our AI system will process and validate your claims instantly. |
| </div> |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| file_input = gr.File( |
| label="Upload Insurance Claim Document", |
| file_types=[".pdf", ".png", ".jpg", ".jpeg"], |
| elem_classes="file-upload" |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| text_output = gr.Textbox( |
| label="Extracted Claim Details", |
| elem_classes="output-div", |
| lines=5 |
| ) |
| validation_output = gr.Textbox( |
| label="Claim Validation Results", |
| elem_classes="output-div" |
| ) |
| classification_output = gr.Textbox( |
| label="Document Classification", |
| elem_classes="output-div" |
| ) |
| |
| file_input.change( |
| fn=process_document, |
| inputs=[file_input], |
| outputs=[text_output, validation_output, classification_output] |
| ) |
|
|
| if __name__ == "__main__": |
| iface.launch(server_name="0.0.0.0", server_port=7860) |
|
|