FreeBibTec2

Sleeping

File size: 31,227 Bytes

a14a3e4
 
106a4e3
a14a3e4
 
 
38e5acc
106a4e3
 
 
 
127ed20
106a4e3
 
 
 
38e5acc
106a4e3
a14a3e4
 
38e5acc
127ed20
38e5acc
 
127ed20
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127ed20
 
38e5acc
 
 
 
 
 
 
127ed20
38e5acc
 
 
 
 
 
 
127ed20
 
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127ed20
 
38e5acc
 
127ed20
 
 
 
 
 
 
 
38e5acc
 
 
127ed20
38e5acc
 
127ed20
38e5acc
 
127ed20
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127ed20
38e5acc
 
106a4e3
38e5acc
106a4e3
38e5acc
 
106a4e3
 
 
38e5acc
106a4e3
 
38e5acc
106a4e3
38e5acc
106a4e3
38e5acc
 
 
 
 
 
106a4e3
38e5acc
 
 
 
 
 
 
106a4e3
 
38e5acc
 
 
106a4e3
38e5acc
 
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
 
38e5acc
106a4e3
 
 
 
 
38e5acc
106a4e3
 
38e5acc
 
 
106a4e3
127ed20
106a4e3
38e5acc
 
 
 
106a4e3
 
a14a3e4
106a4e3
a14a3e4
106a4e3
38e5acc
 
 
 
106a4e3
38e5acc
 
106a4e3
38e5acc
106a4e3
38e5acc
106a4e3
 
127ed20
38e5acc
127ed20
 
106a4e3
 
 
a14a3e4
106a4e3
38e5acc
 
 
 
 
 
 
 
106a4e3
127ed20
a14a3e4
 
 
106a4e3
 
 
38e5acc
 
 
 
 
 
 
 
 
 
 
 
a14a3e4
38e5acc
106a4e3
38e5acc
106a4e3
38e5acc
 
106a4e3
 
38e5acc
 
 
 
 
 
 
 
 
a14a3e4
127ed20
106a4e3
 
 
38e5acc
106a4e3
127ed20
106a4e3
 
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a14a3e4
38e5acc
106a4e3
38e5acc
106a4e3
38e5acc
 
106a4e3
38e5acc
106a4e3
a14a3e4
38e5acc
106a4e3
 
38e5acc
 
106a4e3
38e5acc
127ed20
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
a14a3e4
38e5acc
106a4e3
38e5acc
106a4e3
38e5acc
106a4e3
38e5acc
 
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
106a4e3
38e5acc
 
 
 
 
 
 
 
127ed20
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127ed20
106a4e3
38e5acc
 
 
106a4e3
38e5acc
 
106a4e3
38e5acc
 
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
38e5acc
 
 
106a4e3
 
38e5acc
 
127ed20
38e5acc
 
 
 
 
 
 
 
127ed20
 
38e5acc
127ed20
38e5acc
106a4e3
38e5acc
 
106a4e3
38e5acc
106a4e3
38e5acc
 
106a4e3
38e5acc
 
 
 
 
 
106a4e3
 
38e5acc
106a4e3
38e5acc
106a4e3
38e5acc
 
 
106a4e3
38e5acc
106a4e3
38e5acc
 
 
 
 
106a4e3
38e5acc
 
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
a14a3e4
38e5acc
a14a3e4
38e5acc
 
106a4e3
38e5acc
106a4e3
38e5acc
 
 
 
106a4e3
 
38e5acc
 
106a4e3
 
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127ed20
 
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127ed20
38e5acc
 
 
 
 
 
106a4e3
38e5acc
 
 
 
106a4e3
 
 
38e5acc
106a4e3
38e5acc
 
 
106a4e3
 
38e5acc
 
 
 
106a4e3
 
38e5acc
 
106a4e3
38e5acc
106a4e3
 
38e5acc
 
 
 
127ed20
106a4e3
38e5acc
 
 
 
 
 
 
 
106a4e3
 
38e5acc
106a4e3
38e5acc
 
 
 
 
127ed20
 
38e5acc
106a4e3
127ed20
106a4e3
38e5acc
 
106a4e3
 
 
38e5acc
106a4e3
38e5acc
106a4e3
 
38e5acc
 
127ed20
 
38e5acc
127ed20
38e5acc
127ed20
38e5acc
106a4e3
 
127ed20
 
38e5acc
 
 
127ed20
 
38e5acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106a4e3
a14a3e4
 
 
106a4e3
a14a3e4
38e5acc
106a4e3
38e5acc
 
 
 
 
 
 
 
 
 
a14a3e4
 
38e5acc

import os
import re
import json
import logging
import zipfile
import asyncio
from typing import Dict, List, Optional, Any
from datetime import datetime
import gradio as gr
from enum import Enum
import hashlib
import aiohttp

# Configuración de logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# ========== CONFIGURACIÓN DE APIs ==========

class APIProvider:
    """Gestor de diferentes APIs de IA"""
    
    def __init__(self):
        self.available_apis = {
            "nebius": {
                "name": "Nebius AI",
                "base_url": "https://api.nebius.ai/v1",
                "models": ["neural-chat-7b-v3-1", "llama-2-70b-chat", "mistral-7b-instruct"],
                "headers": {"Content-Type": "application/json"}
            },
            "moonshot": {
                "name": "Moonshot AI",
                "base_url": "https://api.moonshot.cn/v1",
                "models": ["moonshot-v1-8k", "moonshot-v1-32k", "moonshot-v1-128k"],
                "headers": {"Content-Type": "application/json"}
            },
            "openai": {
                "name": "OpenAI",
                "base_url": "https://api.openai.com/v1",
                "models": ["gpt-4", "gpt-3.5-turbo", "gpt-4-turbo"],
                "headers": {"Content-Type": "application/json"}
            },
            "anthropic": {
                "name": "Anthropic",
                "base_url": "https://api.anthropic.com/v1",
                "models": ["claude-3-opus-20240229", "claude-3-sonnet-20240229", "claude-3-haiku-20240307"],
                "headers": {"Content-Type": "application/json", "anthropic-version": "2023-06-01"}
            },
            "deepseek": {
                "name": "DeepSeek",
                "base_url": "https://api.deepseek.com/v1",
                "models": ["deepseek-chat", "deepseek-coder"],
                "headers": {"Content-Type": "application/json"}
            }
        }
        
        # Para Kimi, necesitamos configurar un endpoint específico
        self.custom_models = {
            "moonshotai/Kimi-K2-Instruct": {
                "provider": "moonshot",
                "model_id": "moonshot-v1-8k",  # Asumiendo que es compatible
                "requires_special_handling": True
            }
        }
    
    async def call_api(self, provider: str, api_key: str, model: str, 
                      messages: List[Dict], max_tokens: int = 1000) -> Optional[str]:
        """Llamar a la API del proveedor seleccionado"""
        if provider not in self.available_apis and provider not in ["custom", "moonshot"]:
            logger.error(f"Proveedor no soportado: {provider}")
            return None
        
        try:
            # Manejo especial para Kimi
            if model == "moonshotai/Kimi-K2-Instruct":
                return await self._call_moonshot_kimi(api_key, messages, max_tokens)
            
            # Configuración según el proveedor
            if provider in ["moonshot", "custom"]:
                base_url = self.available_apis["moonshot"]["base_url"]
                headers = {
                    "Authorization": f"Bearer {api_key}",
                    "Content-Type": "application/json"
                }
            else:
                api_config = self.available_apis[provider]
                base_url = api_config["base_url"]
                headers = {**api_config["headers"], "Authorization": f"Bearer {api_key}"}
            
            # Preparar payload
            payload = {
                "model": model,
                "messages": messages,
                "max_tokens": max_tokens,
                "temperature": 0.7,
                "top_p": 0.95
            }
            
            # Realizar la llamada
            url = f"{base_url}/chat/completions"
            
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    url,
                    headers=headers,
                    json=payload,
                    timeout=30
                ) as response:
                    if response.status == 200:
                        data = await response.json()
                        return data.get("choices", [{}])[0].get("message", {}).get("content", "")
                    else:
                        error_text = await response.text()
                        logger.error(f"API Error {response.status}: {error_text}")
                        return None
                        
        except Exception as e:
            logger.error(f"Error calling API {provider}: {e}")
            return None
    
    async def _call_moonshot_kimi(self, api_key: str, messages: List[Dict], max_tokens: int) -> Optional[str]:
        """Llamada específica para Kimi de Moonshot"""
        try:
            url = "https://api.moonshot.cn/v1/chat/completions"
            headers = {
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json"
            }
            
            payload = {
                "model": "moonshot-v1-8k",  # Modelo base para Kimi
                "messages": messages,
                "max_tokens": max_tokens,
                "temperature": 0.7,
                "top_p": 0.95
            }
            
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    url,
                    headers=headers,
                    json=payload,
                    timeout=30
                ) as response:
                    if response.status == 200:
                        data = await response.json()
                        return data.get("choices", [{}])[0].get("message", {}).get("content", "")
                    else:
                        error_text = await response.text()
                        logger.error(f"Kimi API Error {response.status}: {error_text}")
                        return None
                        
        except Exception as e:
            logger.error(f"Error calling Kimi API: {e}")
            return None

# ========== EXTRACTOR DE REFERENCIAS ==========

class ReferenceExtractor:
    """Extrae referencias bibliográficas de texto"""
    
    def __init__(self):
        self.patterns = {
            "doi": [
                r'\b10\.\d{4,9}/[-._;()/:A-Z0-9]+\b',
                r'doi:\s*(10\.\d{4,9}/[-._;()/:A-Z0-9]+)',
                r'DOI:\s*(10\.\d{4,9}/[-._;()/:A-Z0-9]+)'
            ],
            "arxiv": [
                r'arXiv:\s*(\d{4}\.\d{4,5}(v\d+)?)',
                r'arxiv:\s*([a-z\-]+/\d{7})',
                r'\b\d{4}\.\d{4,5}(v\d+)?\b'
            ],
            "isbn": [
                r'ISBN(?:-1[03])?:?\s*(97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]',
                r'\b(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]\b'
            ],
            "url": [
                r'https?://[^\s<>"]+|www\.[^\s<>"]+'
            ],
            "pmid": [
                r'PMID:\s*(\d+)',
                r'PubMed ID:\s*(\d+)'
            ]
        }
    
    def extract_from_text(self, text: str) -> Dict[str, List[str]]:
        """Extrae todos los identificadores del texto"""
        results = {}
        
        for ref_type, patterns in self.patterns.items():
            matches = []
            for pattern in patterns:
                found = re.findall(pattern, text, re.IGNORECASE)
                # Limpiar los resultados
                for match in found:
                    if isinstance(match, tuple):
                        match = match[0]
                    if match:
                        match = self._clean_identifier(match, ref_type)
                        if match and match not in matches:
                            matches.append(match)
            
            if matches:
                results[ref_type] = matches
        
        return results
    
    def _clean_identifier(self, identifier: str, ref_type: str) -> str:
        """Limpia el identificador"""
        identifier = identifier.strip()
        
        # Eliminar prefijos
        prefixes = ['doi:', 'DOI:', 'arxiv:', 'arXiv:', 'isbn:', 'ISBN:', 'pmid:', 'PMID:']
        for prefix in prefixes:
            if identifier.startswith(prefix):
                identifier = identifier[len(prefix):].strip()
        
        # Limpiar caracteres
        identifier = identifier.strip('"\'<>()[]{}')
        
        # Para URLs, asegurar protocolo
        if ref_type == "url" and not identifier.startswith(('http://', 'https://')):
            identifier = f"https://{identifier}"
        
        return identifier

# ========== VERIFICADOR DE REFERENCIAS ==========

class ReferenceVerifier:
    """Verifica y descarga referencias"""
    
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
    
    async def verify_doi(self, doi: str) -> Dict[str, Any]:
        """Verifica un DOI y obtiene metadatos"""
        import requests
        
        result = {
            "identifier": doi,
            "type": "doi",
            "verified": False,
            "metadata": {},
            "download_url": None,
            "error": None
        }
        
        try:
            # Intentar con Crossref
            url = f"https://api.crossref.org/works/{doi}"
            response = requests.get(url, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                work = data.get('message', {})
                
                result["verified"] = True
                result["metadata"] = {
                    "title": work.get('title', [''])[0],
                    "authors": work.get('author', []),
                    "journal": work.get('container-title', [''])[0],
                    "year": work.get('published', {}).get('date-parts', [[None]])[0][0],
                    "url": work.get('URL')
                }
                
                # Buscar PDF
                links = work.get('link', [])
                for link in links:
                    if link.get('content-type') == 'application/pdf':
                        result["download_url"] = link.get('URL')
                        break
                
                # Si no hay PDF en Crossref, probar Unpaywall
                if not result["download_url"]:
                    unpaywall_url = f"https://api.unpaywall.org/v2/{doi}[email protected]"
                    unpaywall_response = requests.get(unpaywall_url, timeout=10)
                    if unpaywall_response.status_code == 200:
                        unpaywall_data = unpaywall_response.json()
                        if unpaywall_data.get('is_oa'):
                            result["download_url"] = unpaywall_data.get('best_oa_location', {}).get('url')
            
            else:
                result["error"] = f"Crossref API returned {response.status_code}"
                
        except Exception as e:
            result["error"] = str(e)
        
        return result
    
    async def verify_arxiv(self, arxiv_id: str) -> Dict[str, Any]:
        """Verifica un arXiv ID"""
        import requests
        
        result = {
            "identifier": arxiv_id,
            "type": "arxiv",
            "verified": False,
            "metadata": {},
            "download_url": None,
            "error": None
        }
        
        try:
            # Limpiar ID
            if 'arxiv:' in arxiv_id.lower():
                arxiv_id = arxiv_id.split(':')[-1].strip()
            
            # Obtener metadatos
            api_url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
            response = requests.get(api_url, headers=self.headers, timeout=10)
            
            if response.status_code == 200:
                result["verified"] = True
                result["download_url"] = f"https://arxiv.org/pdf/{arxiv_id}.pdf"
                
                # Parsear metadatos básicos del XML
                import xml.etree.ElementTree as ET
                root = ET.fromstring(response.text)
                ns = {'atom': 'http://www.w3.org/2005/Atom'}
                
                entry = root.find('.//atom:entry', ns)
                if entry is not None:
                    title = entry.find('atom:title', ns)
                    if title is not None:
                        result["metadata"]["title"] = title.text
                    
                    summary = entry.find('atom:summary', ns)
                    if summary is not None:
                        result["metadata"]["abstract"] = summary.text
            
            else:
                result["error"] = f"arXiv API returned {response.status_code}"
                
        except Exception as e:
            result["error"] = str(e)
        
        return result
    
    async def download_paper(self, url: str, filename: str) -> Optional[str]:
        """Descarga un paper desde una URL"""
        import requests
        import os
        
        try:
            response = requests.get(url, headers=self.headers, stream=True, timeout=30)
            
            if response.status_code == 200:
                # Crear directorio de descargas si no existe
                os.makedirs("downloads", exist_ok=True)
                
                # Determinar extensión
                content_type = response.headers.get('content-type', '')
                if 'application/pdf' in content_type:
                    ext = '.pdf'
                elif 'application/epub' in content_type:
                    ext = '.epub'
                else:
                    ext = '.pdf'  # Por defecto
                
                filepath = os.path.join("downloads", f"{filename}{ext}")
                
                with open(filepath, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        if chunk:
                            f.write(chunk)
                
                return filepath
                
        except Exception as e:
            logger.error(f"Error downloading {url}: {e}")
        
        return None

# ========== SISTEMA PRINCIPAL ==========

class BibliographySystem:
    """Sistema principal de procesamiento bibliográfico"""
    
    def __init__(self):
        self.extractor = ReferenceExtractor()
        self.verifier = ReferenceVerifier()
        self.api_provider = APIProvider()
        
        # Directorios
        os.makedirs("downloads", exist_ok=True)
        os.makedirs("reports", exist_ok=True)
    
    async def process_document(self, text: str, use_ai: bool = False, 
                              api_provider: str = "openai", api_key: str = "",
                              api_model: str = "") -> Dict[str, Any]:
        """Procesa un documento y extrae referencias"""
        start_time = datetime.now()
        
        # 1. Extraer referencias
        logger.info("Extracting references...")
        references = self.extractor.extract_from_text(text)
        
        total_refs = sum(len(v) for v in references.values())
        logger.info(f"Found {total_refs} references")
        
        # 2. Verificar referencias
        logger.info("Verifying references...")
        verified_refs = []
        download_tasks = []
        
        # Procesar DOIs
        for doi in references.get("doi", []):
            result = await self.verifier.verify_doi(doi)
            if result["verified"]:
                verified_refs.append(result)
                if result["download_url"]:
                    # Programar descarga
                    filename = hashlib.md5(doi.encode()).hexdigest()[:8]
                    download_tasks.append(
                        self.verifier.download_paper(result["download_url"], filename)
                    )
        
        # Procesar arXiv
        for arxiv_id in references.get("arxiv", []):
            result = await self.verifier.verify_arxiv(arxiv_id)
            if result["verified"]:
                verified_refs.append(result)
                if result["download_url"]:
                    filename = hashlib.md5(arxiv_id.encode()).hexdigest()[:8]
                    download_tasks.append(
                        self.verifier.download_paper(result["download_url"], filename)
                    )
        
        # 3. Usar IA para análisis si está activado
        ai_analysis = None
        if use_ai and api_key and api_provider:
            logger.info("Using AI for analysis...")
            ai_analysis = await self._analyze_with_ai(
                text, references, verified_refs, 
                api_provider, api_key, api_model
            )
        
        # 4. Descargar archivos
        logger.info("Downloading files...")
        downloaded_files = []
        if download_tasks:
            download_results = await asyncio.gather(*download_tasks)
            downloaded_files = [r for r in download_results if r]
        
        # 5. Crear reporte
        processing_time = (datetime.now() - start_time).total_seconds()
        
        report = {
            "timestamp": datetime.now().isoformat(),
            "processing_time": processing_time,
            "total_references_found": total_refs,
            "references_by_type": references,
            "verified_references": len(verified_refs),
            "verification_details": verified_refs,
            "downloaded_files": downloaded_files,
            "ai_analysis": ai_analysis,
            "statistics": {
                "verification_rate": len(verified_refs) / max(1, total_refs),
                "download_rate": len(downloaded_files) / max(1, len(verified_refs))
            }
        }
        
        # 6. Guardar reporte
        report_filename = f"report_{hashlib.md5(text.encode()).hexdigest()[:8]}.json"
        report_path = os.path.join("reports", report_filename)
        
        with open(report_path, 'w', encoding='utf-8') as f:
            json.dump(report, f, indent=2, ensure_ascii=False)
        
        # 7. Crear ZIP
        zip_path = self._create_zip(report, downloaded_files)
        
        return {
            "success": True,
            "report": report,
            "zip_path": zip_path,
            "summary": {
                "found": total_refs,
                "verified": len(verified_refs),
                "downloaded": len(downloaded_files),
                "time": f"{processing_time:.2f}s"
            }
        }
    
    async def _analyze_with_ai(self, text: str, references: Dict, 
                              verified_refs: List, api_provider: str, 
                              api_key: str, api_model: str) -> Optional[Dict]:
        """Analiza el documento con IA"""
        try:
            # Preparar prompt
            prompt = f"""Analiza el siguiente documento académico y sus referencias:

Documento (primeros 2000 caracteres):
{text[:2000]}...

Referencias encontradas:
{json.dumps(references, indent=2, ensure_ascii=False)}

Referencias verificadas: {len(verified_refs)}

Proporciona un análisis que incluya:
1. Temas principales del documento
2. Calidad de las referencias (relevancia, actualidad)
3. Sugerencias de referencias faltantes
4. Evaluación general de la solidez bibliográfica

Responde en formato JSON con las siguientes claves:
- main_topics (lista de temas)
- reference_quality (score 1-10)
- missing_references (sugerencias)
- overall_assessment (texto)
- recommendations (lista)"""

            messages = [
                {"role": "system", "content": "Eres un experto en análisis bibliográfico académico."},
                {"role": "user", "content": prompt}
            ]
            
            # Llamar a la API
            analysis_text = await self.api_provider.call_api(
                api_provider, api_key, api_model, messages, max_tokens=1500
            )
            
            if analysis_text:
                # Intentar extraer JSON
                try:
                    # Buscar JSON en la respuesta
                    json_match = re.search(r'\{.*\}', analysis_text, re.DOTALL)
                    if json_match:
                        return json.loads(json_match.group())
                    else:
                        return {"raw_analysis": analysis_text}
                except:
                    return {"raw_analysis": analysis_text}
        
        except Exception as e:
            logger.error(f"AI analysis error: {e}")
        
        return None
    
    def _create_zip(self, report: Dict, downloaded_files: List[str]) -> str:
        """Crea un archivo ZIP con los resultados"""
        import zipfile
        from datetime import datetime
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        zip_filename = f"bibliography_results_{timestamp}.zip"
        
        with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
            # Agregar reporte JSON
            report_path = os.path.join("reports", f"report_{timestamp}.json")
            with open(report_path, 'w', encoding='utf-8') as f:
                json.dump(report, f, indent=2, ensure_ascii=False)
            zipf.write(report_path, "report.json")
            
            # Agregar archivos descargados
            for file_path in downloaded_files:
                if os.path.exists(file_path):
                    zipf.write(file_path, f"downloads/{os.path.basename(file_path)}")
            
            # Agregar resumen en texto
            summary = self._generate_summary_text(report)
            zipf.writestr("summary.txt", summary)
        
        return zip_filename
    
    def _generate_summary_text(self, report: Dict) -> str:
        """Genera un resumen en texto"""
        return f"""
        RESUMEN DE PROCESAMIENTO BIBLIOGRÁFICO
        ======================================
        
        Fecha: {report.get('timestamp', 'N/A')}
        Tiempo de procesamiento: {report.get('processing_time', 0):.2f} segundos
        
        ESTADÍSTICAS:
        ------------
        • Referencias encontradas: {report.get('total_references_found', 0)}
        • Referencias verificadas: {report.get('verified_references', 0)}
        • Archivos descargados: {len(report.get('downloaded_files', []))}
        • Tasa de verificación: {report.get('statistics', {}).get('verification_rate', 0) * 100:.1f}%
        • Tasa de descarga: {report.get('statistics', {}).get('download_rate', 0) * 100:.1f}%
        
        REFERENCIAS POR TIPO:
        ---------------------
        {json.dumps(report.get('references_by_type', {}), indent=2, ensure_ascii=False)}
        
        Para más detalles, consulte el reporte JSON incluido.
        """

# ========== INTERFAZ GRADIO SIMPLIFICADA ==========

def create_simple_interface():
    """Crea una interfaz Gradio simple y funcional"""
    
    system = BibliographySystem()
    
    async def process_text(text_input, use_ai, api_provider, api_key, api_model):
        """Procesa el texto ingresado"""
        if not text_input.strip():
            return None, "❌ Error: No se ingresó texto", "", "", {}
        
        try:
            result = await system.process_document(
                text_input, use_ai, api_provider, api_key, api_model
            )
            
            if result["success"]:
                summary = result["summary"]
                
                # Generar HTML para visualización
                html_output = f"""
                <div style="font-family: Arial, sans-serif; padding: 20px;">
                    <h2 style="color: #2c3e50;">📊 Resultados del Procesamiento</h2>
                    
                    <div style="background: #ecf0f1; padding: 15px; border-radius: 10px; margin: 15px 0;">
                        <h3 style="color: #34495e;">📈 Estadísticas</h3>
                        <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 10px;">
                            <div style="background: white; padding: 10px; border-radius: 5px;">
                                <strong>Referencias Encontradas</strong><br>
                                <span style="font-size: 24px; color: #3498db;">{summary['found']}</span>
                            </div>
                            <div style="background: white; padding: 10px; border-radius: 5px;">
                                <strong>Verificadas</strong><br>
                                <span style="font-size: 24px; color: #2ecc71;">{summary['verified']}</span>
                            </div>
                            <div style="background: white; padding: 10px; border-radius: 5px;">
                                <strong>Descargadas</strong><br>
                                <span style="font-size: 24px; color: #9b59b6;">{summary['downloaded']}</span>
                            </div>
                            <div style="background: white; padding: 10px; border-radius: 5px;">
                                <strong>Tiempo</strong><br>
                                <span style="font-size: 24px; color: #e74c3c;">{summary['time']}</span>
                            </div>
                        </div>
                    </div>
                </div>
                """
                
                # Generar texto simple
                text_output = f"""
                Procesamiento completado exitosamente.
                
                • Referencias encontradas: {summary['found']}
                • Referencias verificadas: {summary['verified']}
                • Archivos descargados: {summary['downloaded']}
                • Tiempo de procesamiento: {summary['time']}
                
                El archivo ZIP con los resultados está listo para descargar.
                """
                
                # JSON del reporte (limitado)
                report_json = json.dumps(result["report"], indent=2, ensure_ascii=False)
                if len(report_json) > 5000:
                    report_json = report_json[:5000] + "\n... (reporte truncado por tamaño)"
                
                return result["zip_path"], "✅ Procesamiento completado", html_output, text_output, report_json
            
            else:
                return None, f"❌ Error: {result.get('error', 'Error desconocido')}", "", "", {}
                
        except Exception as e:
            logger.error(f"Processing error: {e}")
            return None, f"❌ Error: {str(e)}", "", "", {}
    
    # Crear interfaz simple
    with gr.Blocks(title="Sistema de Recopilación Bibliográfica", theme=gr.themes.Soft()) as interface:
        gr.Markdown("# 📚 Sistema de Recopilación Bibliográfica")
        gr.Markdown("Extrae, verifica y descarga referencias académicas de textos")
        
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### ⚙️ Configuración")
                
                use_ai = gr.Checkbox(
                    label="Usar IA para análisis avanzado",
                    value=False
                )
                
                api_provider = gr.Dropdown(
                    choices=["openai", "moonshot", "nebius", "anthropic", "deepseek"],
                    label="Proveedor de IA",
                    value="moonshot"
                )
                
                api_key = gr.Textbox(
                    label="API Key",
                    type="password",
                    placeholder="Ingresa tu API key"
                )
                
                api_model = gr.Textbox(
                    label="Modelo (opcional)",
                    value="moonshotai/Kimi-K2-Instruct",
                    placeholder="Deja vacío para usar el modelo por defecto"
                )
                
                gr.Markdown("""
                ### 🔑 APIs Soportadas
                - **Moonshot**: moonshotai/Kimi-K2-Instruct
                - **Nebius**: neural-chat-7b-v3-1
                - **OpenAI**: gpt-4, gpt-3.5-turbo
                - **Anthropic**: Claude 3
                - **DeepSeek**: deepseek-chat
                """)
            
            with gr.Column(scale=2):
                gr.Markdown("### 📄 Ingresar Texto")
                
                text_input = gr.Textbox(
                    label="Texto con referencias bibliográficas",
                    placeholder="Pega aquí tu texto con referencias académicas...",
                    lines=15,
                    max_lines=50
                )
                
                process_btn = gr.Button("🔍 Procesar Texto", variant="primary")
                
                gr.Markdown("### 📦 Resultados")
                
                result_file = gr.File(label="Descargar Resultados (ZIP)")
                result_status = gr.Markdown()
                
                with gr.Tabs():
                    with gr.TabItem("📋 Vista HTML"):
                        html_output = gr.HTML(label="Resultados Visuales")
                    
                    with gr.TabItem("📝 Texto"):
                        text_output = gr.Textbox(
                            label="Resumen",
                            lines=10,
                            max_lines=20
                        )
                    
                    with gr.TabItem("🔧 JSON"):
                        json_output = gr.Code(
                            label="Datos del Reporte",
                            language="json",
                            lines=15
                        )
        
        # Conectar eventos
        process_btn.click(
            process_text,
            inputs=[text_input, use_ai, api_provider, api_key, api_model],
            outputs=[result_file, result_status, html_output, text_output, json_output]
        )
        
        # Ejemplos
        gr.Markdown("### 📖 Ejemplo de Texto")
        gr.Examples(
            examples=[["""Este es un ejemplo de texto con referencias académicas.

1. El paper seminal de AlexNet (Krizhevsky et al., 2012) tiene DOI: 10.1145/3065386

2. El trabajo sobre Transformers está en arXiv: arXiv:1706.03762

3. El libro de Deep Learning tiene ISBN: 978-0262035613

4. Más referencias:
   - DOI: 10.1038/nature14539
   - DOI: 10.1109/CVPR.2016.90
   - arXiv: 1506.02640

URLs académicas:
- https://arxiv.org/abs/1706.03762
- https://doi.org/10.1145/3065386"""]],
            inputs=[text_input],
            label="Ejemplo básico"
        )
    
    return interface

# ========== EJECUCIÓN PRINCIPAL ==========

def main():
    """Función principal"""
    # Crear e iniciar la interfaz
    interface = create_simple_interface()
    
    # Configuración para Hugging Face Spaces
    interface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,  # Desactivar share en Spaces
        debug=False
    )

if __name__ == "__main__":
    main()