satyakimitra commited on
Commit
522f7a0
·
1 Parent(s): 4bf5411

Final Repor Updated

Browse files
.dockerignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .Python
6
+ env
7
+ pip-log.txt
8
+ pip-delete-this-directory.txt
9
+ .tox
10
+ .coverage
11
+ .coverage.*
12
+ .cache
13
+ nosetests.xml
14
+ coverage.xml
15
+ *.cover
16
+ *.log
17
+ .git
18
+ .mypy_cache
19
+ .pytest_cache
20
+ .hypothesis
21
+ .DS_Store
.gitignore CHANGED
@@ -1,23 +1,10 @@
1
- # Model files (too large for GitHub)
2
- models/embeddings/model.safetensors
3
- models/legal-bert/model.safetensors
4
- models/
5
-
6
- # Sample data directories (very large)
7
- data/sample_data/
8
- basic_streamlit/
9
-
10
- # Database files
11
- *.db
12
- *.sqlite3
13
- *.sqlite
14
- basic_streamlit/legal_market_terms.db
15
-
16
- # Python
17
  __pycache__/
18
  *.py[cod]
19
  *$py.class
20
  *.so
 
 
21
  .Python
22
  build/
23
  develop-eggs/
@@ -31,83 +18,59 @@ parts/
31
  sdist/
32
  var/
33
  wheels/
34
- share/python-wheels/
35
  *.egg-info/
36
  .installed.cfg
37
  *.egg
38
- MANIFEST
39
 
40
  # Virtual environments
41
- env/
42
  venv/
43
- .venv/
44
- .env
45
  ENV/
46
 
47
- # IDE
48
  .vscode/
49
  .idea/
50
  *.swp
51
  *.swo
52
- *~
53
 
54
  # OS
55
  .DS_Store
56
- .DS_Store?
57
- ._*
58
- .Spotlight-V100
59
- .Trashes
60
- ehthumbs.db
61
  Thumbs.db
62
 
63
- # Jupyter
64
- .ipynb_checkpoints/
65
-
66
  # Logs
67
- *.log
68
  logs/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  # Cache directories
71
  .cache/
72
- cache/
73
-
74
- # Streamlit
75
- .streamlit/
76
 
77
- # Temporary files
78
- *.tmp
79
- *.temp
80
 
81
- # Documentation
82
- _site/
83
- .sass-cache/
84
- .jekyll-cache/
85
 
86
  # Coverage reports
87
- htmlcov/
88
  .coverage
89
- .coverage.*
90
- coverage.xml
91
- *.cover
92
- *.py,cover
93
- .hypothesis/
94
- .pytest_cache/
95
-
96
- # MyPy
97
- .mypy_cache/
98
- .dmypy.json
99
- dmypy.json
100
 
101
- # Pyre
102
- .pyre/
103
 
104
- # Environment variables
105
- .env
106
- .env.local
107
- .env.production
108
-
109
- # Large file types that might be generated
110
- *.pdf
111
- *.docx
112
- *.zip
113
- *.tar.gz
 
1
+ # Byte-compiled / optimized / DLL files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  __pycache__/
3
  *.py[cod]
4
  *$py.class
5
  *.so
6
+
7
+ # Distribution / packaging
8
  .Python
9
  build/
10
  develop-eggs/
 
18
  sdist/
19
  var/
20
  wheels/
 
21
  *.egg-info/
22
  .installed.cfg
23
  *.egg
 
24
 
25
  # Virtual environments
 
26
  venv/
27
+ env/
 
28
  ENV/
29
 
30
+ # IDEs
31
  .vscode/
32
  .idea/
33
  *.swp
34
  *.swo
 
35
 
36
  # OS
37
  .DS_Store
 
 
 
 
 
38
  Thumbs.db
39
 
 
 
 
40
  # Logs
 
41
  logs/
42
+ *.log
43
+
44
+ # Environment variables
45
+ .env
46
+ .env.local
47
+ .env.dev
48
+ .env.prod
49
+
50
+ # Model downloads (if stored locally)
51
+ models/
52
+ cache/models/
53
+
54
+ # Uploaded files (if stored temporarily)
55
+ uploads/
56
 
57
  # Cache directories
58
  .cache/
59
+ __pycache__/
 
 
 
60
 
61
+ # Reports generated
62
+ *.pdf
63
+ reports/
64
 
65
+ # Jupyter notebooks checkpoints
66
+ .ipynb_checkpoints/
 
 
67
 
68
  # Coverage reports
 
69
  .coverage
70
+ htmlcov/
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # Environment-specific files
73
+ .envrc
74
 
75
+ # Special Case
76
+ basic_streamlit/
 
 
 
 
 
 
 
 
Dockerfile ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 for better performance and compatibility
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ curl \
10
+ wget \
11
+ git \
12
+ build-essential \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy requirements first for better caching
16
+ COPY requirements.txt .
17
+
18
+ # Install Python dependencies
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # Download spaCy model
22
+ RUN python -m spacy download en_core_web_sm
23
+
24
+ # Install Ollama
25
+ RUN curl -fsSL https://ollama.ai/install.sh | sh
26
+
27
+ # Copy application code
28
+ COPY . .
29
+
30
+ # Create necessary directories
31
+ RUN mkdir -p uploads cache logs cache/models models
32
+
33
+ # Download pre-trained models in advance
34
+ RUN python -c "
35
+ from model_manager.model_loader import ModelLoader
36
+ from config.settings import settings
37
+ import logging
38
+
39
+ # Configure basic logging
40
+ logging.basicConfig(level=logging.INFO)
41
+
42
+ try:
43
+ print('Pre-downloading AI models...')
44
+ loader = ModelLoader()
45
+ loader.ensure_models_downloaded()
46
+ print('All models downloaded successfully!')
47
+ except Exception as e:
48
+ print(f'Model download warning: {e}')
49
+ print('Models will be downloaded on first use...')
50
+ "
51
+
52
+ # Start Ollama server and pull model in background
53
+ RUN ollama serve &
54
+ RUN sleep 10 && ollama pull llama3:8b &
55
+
56
+ # Expose port (required for Hugging Face Spaces)
57
+ EXPOSE 7860
58
+
59
+ # Health check
60
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
61
+ CMD curl -f http://localhost:7860/docs || exit 1
62
+
63
+ # Start the application
64
+ CMD ["sh", "-c", "
65
+ # Start Ollama server in background
66
+ echo 'Starting Ollama server...'
67
+ ollama serve &
68
+
69
+ # Wait for Ollama to start
70
+ echo 'Waiting for Ollama to start...'
71
+ sleep 15
72
+
73
+ # Ensure the model is pulled
74
+ echo 'Checking for Ollama model...'
75
+ ollama pull llama3:8b &
76
+
77
+ # Start the FastAPI application
78
+ echo 'Starting AI Contract Risk Analyzer...'
79
+ uvicorn main:app --host 0.0.0.0 --port 7860 --reload
80
+ "]
README.md CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  <div align="center">
2
 
3
  # AI Contract Risk Analyzer 🤖⚖️
@@ -5,25 +16,33 @@
5
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
6
  [![FastAPI](https://img.shields.io/badge/FastAPI-0.104+-green.svg)](https://fastapi.tiangolo.com/)
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 
 
 
 
 
 
 
8
 
9
  > **Democratizing Legal Intelligence Through AI**
10
- > Comprehensive contract risk analysis using Legal-BERT, multi-model NLP, and LLM integration
11
 
12
  </div>
13
 
14
-
15
  ## 🎯 Overview
16
 
17
- The AI Contract Risk Analyzer is a production-grade legal document analysis platform that leverages state-of-the-art NLP and machine learning to provide instant, comprehensive contract risk assessment. Built with Legal-BERT for clause understanding, semantic embeddings for similarity matching, and LLM integration for natural language explanations.
18
 
19
  ### Key Features
20
 
21
- - 📄 **Multi-Format Support**: PDF, DOCX document processing
22
- - 🔍 **12+ Contract Categories**: Employment, NDA, Lease, Service agreements, etc.
23
- - ⚡ **Sub-30s Analysis**: Real-time risk scoring and clause extraction
24
  - 🔒 **Privacy-First**: Ephemeral processing, zero data retention
25
- - 🌐 **LLM Integration**: Ollama, OpenAI, Anthropic support
26
- - 📊 **Comprehensive Reports**: Executive summaries, negotiation points, market comparisons
 
27
 
28
  ---
29
 
@@ -37,6 +56,7 @@ The AI Contract Risk Analyzer is a production-grade legal document analysis plat
37
  - [Configuration](#-configuration)
38
  - [Development](#-development)
39
  - [Performance](#-performance)
 
40
  - [License](#-license)
41
 
42
  ---
@@ -45,6 +65,8 @@ The AI Contract Risk Analyzer is a production-grade legal document analysis plat
45
 
46
  ### System Overview
47
 
 
 
48
  ```
49
  ┌─────────────────────────────────────────────────────────────┐
50
  │ Client Layer │
@@ -109,113 +131,114 @@ The AI Contract Risk Analyzer is a production-grade legal document analysis plat
109
  └─────────────────────────────────────────────────────────────┘
110
  ```
111
 
112
- ### Analysis Pipeline Flowchart
 
113
 
114
  ```mermaid
115
  graph TB
116
- Start[User Uploads Contract PDF/DOCX] --> Read[Document Reader]
117
  Read --> Validate{Contract Validator}
118
  Validate -->|Invalid| Error[Return Error]
119
  Validate -->|Valid| Classify[Contract Classifier]
120
 
121
- Classify --> Extract[Clause Extractor]
 
 
 
122
 
123
- Extract --> Risk[Risk Analyzer]
124
- Extract --> Terms[Term Analyzer]
125
- Extract --> Protect[Protection Checker]
126
- Extract --> Market[Market Comparator]
127
 
128
- Risk --> Aggregate[Result Aggregator]
129
- Terms --> Aggregate
130
- Protect --> Aggregate
131
- Market --> Aggregate
132
-
133
- Extract --> LLM[LLM Interpreter]
134
- LLM --> Aggregate
135
-
136
- Risk --> Negotiate[Negotiation Engine]
137
- Terms --> Negotiate
138
- Protect --> Negotiate
139
- Negotiate --> Aggregate
140
-
141
- Aggregate --> Report[Executive Summary]
142
- Report --> End[JSON Response + PDF Report]
143
 
144
  style Start fill:#e1f5e1
145
  style End fill:#e1f5e1
146
  style Error fill:#ffe1e1
147
  style Classify fill:#e1e5ff
148
  style Extract fill:#e1e5ff
149
- style Risk fill:#ffe5e1
150
- style LLM fill:#fff5e1
151
- style Negotiate fill:#f5e1ff
152
  ```
153
 
154
  ### Component Diagram
155
 
156
  ```mermaid
157
  graph LR
158
- subgraph "Frontend"
159
- UI[HTML/CSS/JS]
160
  end
161
-
162
- subgraph "Backend (FastAPI)"
163
- API[REST API]
164
- Jobs[Job Queue]
165
  end
166
-
167
- subgraph "Services"
168
- C[Classifier]
169
- E[Extractor]
170
- R[Risk]
171
- T[Terms]
172
- P[Protection]
173
- L[LLM]
174
- N[Negotiation]
175
- M[Market]
 
176
  end
177
-
178
- subgraph "Model Manager"
179
- Reg[Registry]
180
- Load[Loader]
181
- Cache[Cache]
182
- LLMMgr[LLM Manager]
183
  end
184
-
185
  subgraph "AI Models"
186
- BERT[Legal-BERT]
187
- SBERT[Sentence-BERT]
188
- Ollama[Ollama]
189
- OpenAI[OpenAI]
190
- Claude[Anthropic]
191
  end
192
-
193
- UI -->|HTTP| API
194
- API --> Jobs
195
- Jobs --> C
196
- C --> E
197
- E --> R
198
- E --> T
199
- E --> P
200
- E --> M
201
- E --> L
202
- L --> N
203
-
204
- C --> Reg
205
- E --> Reg
206
- R --> Reg
207
- M --> Reg
208
- L --> LLMMgr
209
- N --> LLMMgr
210
-
211
- Reg --> Load
212
- Load --> Cache
213
- Cache --> BERT
214
- Cache --> SBERT
215
-
216
- LLMMgr --> Ollama
217
- LLMMgr --> OpenAI
218
- LLMMgr --> Claude
 
 
 
 
 
 
 
 
 
219
  ```
220
 
221
  ---
@@ -232,11 +255,11 @@ Storage: 10GB for models
232
  GPU: Optional (3x speedup with NVIDIA GPU + CUDA 11.8+)
233
  ```
234
 
235
- ### Option 1: Quick Install (Recommended)
236
 
237
  ```bash
238
  # Clone repository
239
- git clone https://github.com/yourusername/contract-guard-ai.git
240
  cd contract-guard-ai
241
 
242
  # Create virtual environment
@@ -249,41 +272,10 @@ pip install -r requirements.txt
249
  # Download spaCy model (optional, for advanced text processing)
250
  python -m spacy download en_core_web_sm
251
 
252
- # Download AI models (automatic on first run, or manual)
253
  python -c "from model_manager.model_loader import ModelLoader; ModelLoader()"
254
  ```
255
 
256
- ### Option 2: Docker Installation
257
-
258
- ```bash
259
- # Build Docker image
260
- docker build -t contract-analyzer .
261
-
262
- # Run container
263
- docker run -p 8000:8000 -v ./models:/app/models contract-analyzer
264
-
265
- # With GPU support
266
- docker run --gpus all -p 8000:8000 -v ./models:/app/models contract-analyzer
267
- ```
268
-
269
- ### Option 3: Development Setup
270
-
271
- ```bash
272
- # Install in editable mode with dev dependencies
273
- pip install -e ".[dev]"
274
-
275
- # Install pre-commit hooks
276
- pre-commit install
277
-
278
- # Run tests
279
- pytest tests/
280
-
281
- # Run linting
282
- flake8 .
283
- black .
284
- mypy .
285
- ```
286
-
287
  ---
288
 
289
  ## ⚡ Quick Start
@@ -291,7 +283,7 @@ mypy .
291
  ### 1. Start Required Services
292
 
293
  ```bash
294
- # Terminal 1: Start Ollama (for LLM features)
295
  ollama serve
296
 
297
  # Pull LLM model
@@ -332,370 +324,13 @@ MIN_CONTRACT_LENGTH=300
332
  ### 3. Launch Application
333
 
334
  ```bash
335
- # Option A: Use launch script (starts API + Frontend)
336
- python launch.py
337
-
338
- # Option B: Start API only
339
  python app.py
340
 
341
- # Option C: Use Uvicorn directly
342
  uvicorn app:app --reload --host 0.0.0.0 --port 8000
343
  ```
344
 
345
- ### 4. Access Services
346
-
347
- - **API**: http://localhost:8000
348
- - **Interactive Docs**: http://localhost:8000/api/docs
349
- - **Health Check**: http://localhost:8000/api/v1/health
350
-
351
- ### 5. Analyze Your First Contract
352
-
353
- ```bash
354
- # Using cURL
355
- curl -X POST "http://localhost:8000/api/v1/analyze" \
356
- -F "file=@/path/to/contract.pdf" \
357
- -F "max_clauses=15" \
358
- -F "interpret_clauses=true" \
359
- -F "llm_provider=ollama"
360
-
361
- # Response (job created)
362
- {
363
- "job_id": "abc-123-def-456",
364
- "status": "pending",
365
- "progress": 0,
366
- "message": "Analysis queued"
367
- }
368
-
369
- # Check status
370
- curl "http://localhost:8000/api/v1/jobs/abc-123-def-456"
371
-
372
- # Response (completed)
373
- {
374
- "job_id": "abc-123-def-456",
375
- "status": "completed",
376
- "progress": 100,
377
- "result": {
378
- "overall_score": 78,
379
- "risk_level": "HIGH",
380
- "clauses": [...],
381
- "unfavorable_terms": [...],
382
- ...
383
- }
384
- }
385
- ```
386
-
387
- ---
388
-
389
- ## 📚 API Documentation
390
-
391
- ### Core Endpoints
392
-
393
- #### 1. Analyze Contract (Async)
394
-
395
- ```http
396
- POST /api/v1/analyze
397
- Content-Type: multipart/form-data
398
-
399
- Parameters:
400
- - file: File (required) - PDF or DOCX contract
401
- - max_clauses: int (default: 15) - Max clauses to extract
402
- - interpret_clauses: bool (default: true) - Generate plain-English explanations
403
- - generate_negotiation_points: bool (default: true) - Create negotiation strategy
404
- - compare_to_market: bool (default: true) - Compare to market standards
405
- - llm_provider: str (default: "ollama") - LLM provider: ollama/openai/anthropic
406
-
407
- Response: 202 Accepted
408
- {
409
- "job_id": "uuid",
410
- "status": "pending",
411
- "progress": 0,
412
- "message": "Analysis queued",
413
- "created_at": "ISO-8601 timestamp"
414
- }
415
- ```
416
-
417
- #### 2. Get Job Status
418
-
419
- ```http
420
- GET /api/v1/jobs/{job_id}
421
-
422
- Response: 200 OK
423
- {
424
- "job_id": "uuid",
425
- "status": "completed", // pending/processing/completed/failed
426
- "progress": 100,
427
- "message": "Analysis complete",
428
- "result": {
429
- "analysis_id": "uuid",
430
- "timestamp": "ISO-8601",
431
- "classification": {...},
432
- "clauses": [...],
433
- "risk_analysis": {...},
434
- "unfavorable_terms": [...],
435
- "missing_protections": [...],
436
- "clause_interpretations": [...],
437
- "negotiation_points": [...],
438
- "market_comparisons": [...],
439
- "executive_summary": "text",
440
- "metadata": {...}
441
- }
442
- }
443
- ```
444
-
445
- #### 3. Health Check
446
-
447
- ```http
448
- GET /api/v1/health
449
-
450
- Response: 200 OK
451
- {
452
- "status": "healthy",
453
- "version": "1.0.0",
454
- "timestamp": "ISO-8601",
455
- "models_loaded": 2,
456
- "gpu_available": true
457
- }
458
- ```
459
-
460
- #### 4. Quick Validation
461
-
462
- ```http
463
- POST /api/v1/validate
464
- Content-Type: multipart/form-data
465
-
466
- Parameters:
467
- - file: File (required)
468
-
469
- Response: 200 OK
470
- {
471
- "is_valid": true,
472
- "validation_type": "high_confidence",
473
- "message": "Strong contract indicators (score: 45)",
474
- "scores": {
475
- "total": 45,
476
- "indicators": 30,
477
- "structural": 15
478
- },
479
- "features": {
480
- "has_signature_block": true,
481
- "has_effective_date": true,
482
- "has_party_identification": true
483
- }
484
- }
485
- ```
486
-
487
- #### 5. List Jobs
488
-
489
- ```http
490
- GET /api/v1/jobs?limit=10
491
-
492
- Response: 200 OK
493
- [
494
- {
495
- "job_id": "uuid",
496
- "status": "completed",
497
- "created_at": "ISO-8601",
498
- ...
499
- },
500
- ...
501
- ]
502
- ```
503
-
504
- #### 6. Delete Job
505
-
506
- ```http
507
- DELETE /api/v1/jobs/{job_id}
508
-
509
- Response: 200 OK
510
- {
511
- "message": "Job deleted successfully",
512
- "job_id": "uuid"
513
- }
514
- ```
515
-
516
- #### 7. Get Contract Categories
517
-
518
- ```http
519
- GET /api/v1/categories
520
-
521
- Response: 200 OK
522
- [
523
- "employment",
524
- "consulting",
525
- "nda",
526
- "technology",
527
- "intellectual_property",
528
- "real_estate",
529
- "financial",
530
- "business",
531
- "sales",
532
- "service_agreement",
533
- "vendor",
534
- "agency"
535
- ]
536
- ```
537
-
538
- #### 8. Get Market Standards
539
-
540
- ```http
541
- GET /api/v1/market-standards/{category}
542
-
543
- Response: 200 OK
544
- {
545
- "reasonable": "Market-standard reasonable clause text...",
546
- "standard": "Typical market standard clause text...",
547
- "aggressive": "Aggressive/unfavorable clause text..."
548
- }
549
- ```
550
-
551
- ### Response Schemas
552
-
553
- <details>
554
- <summary><b>Complete Analysis Result Schema</b></summary>
555
-
556
- ```json
557
- {
558
- "analysis_id": "uuid",
559
- "timestamp": "2025-01-15T10:30:00.000Z",
560
-
561
- "classification": {
562
- "category": "employment",
563
- "subcategory": "full_time",
564
- "confidence": 0.89,
565
- "reasoning": ["Strong keyword match", "Semantic similarity 0.87"],
566
- "detected_keywords": ["employee", "salary", "benefits"],
567
- "alternative_categories": [
568
- {"category": "consulting", "confidence": 0.43}
569
- ]
570
- },
571
-
572
- "clauses": [
573
- {
574
- "text": "Employee shall not engage in competitive business...",
575
- "reference": "Section 8.2",
576
- "category": "non_compete",
577
- "confidence": 0.92,
578
- "start_pos": 5432,
579
- "end_pos": 5680,
580
- "extraction_method": "structural",
581
- "risk_indicators": ["non-compete", "competitive"],
582
- "subclauses": [],
583
- "legal_bert_score": 0.88
584
- }
585
- ],
586
-
587
- "risk_analysis": {
588
- "overall_score": 78,
589
- "risk_level": "HIGH",
590
- "category_scores": {
591
- "restrictive_covenants": 85,
592
- "termination_rights": 72,
593
- "penalties_liability": 68
594
- },
595
- "risk_factors": ["restrictive_covenants"],
596
- "detailed_findings": {
597
- "restrictive_covenants": [
598
- "Duration of 24 months is excessive"
599
- ]
600
- },
601
- "benchmark_comparison": {
602
- "non_compete_duration": "✗ Exceeds market standards"
603
- },
604
- "risk_breakdown": [
605
- {
606
- "category": "Restrictive Covenants",
607
- "score": 85,
608
- "summary": "Analysis of non-compete clauses",
609
- "findings": ["Duration 24 months excessive"]
610
- }
611
- ]
612
- },
613
-
614
- "unfavorable_terms": [
615
- {
616
- "term": "Non Compete Overly Broad",
617
- "category": "non_compete",
618
- "severity": "critical",
619
- "explanation": "Restricts ability to earn living",
620
- "clause_reference": "Section 8.2",
621
- "suggested_fix": "Limit to 6-12 months, direct competitors only"
622
- }
623
- ],
624
-
625
- "missing_protections": [
626
- {
627
- "protection": "'For Cause' Definition",
628
- "importance": "critical",
629
- "explanation": "Termination grounds are ambiguous",
630
- "recommendation": "Add clear 'for cause' definition",
631
- "category": "termination",
632
- "examples": ["For Cause means: gross negligence, breach..."]
633
- }
634
- ],
635
-
636
- "clause_interpretations": [
637
- {
638
- "clause_reference": "Section 8.2",
639
- "original_text": "Employee shall not engage...",
640
- "plain_english_summary": "Prevents working for competitors",
641
- "key_points": [
642
- "Restricts future employment",
643
- "Duration and scope are key factors"
644
- ],
645
- "potential_risks": [
646
- "Could prevent earning a living in your field",
647
- "24-month duration is excessive"
648
- ],
649
- "favorability": "unfavorable",
650
- "confidence": 0.85
651
- }
652
- ],
653
-
654
- "negotiation_points": [
655
- {
656
- "priority": 1,
657
- "category": "non_compete",
658
- "issue": "Non Compete Overly Broad",
659
- "current_language": "Employee shall not engage in any competitive business for 24 months globally...",
660
- "proposed_language": "Employee agrees not to work for direct competitors in software industry within 50 miles for 6 months...",
661
- "rationale": "Current terms unreasonably restrict ability to earn living",
662
- "fallback_position": "If 6 months refused, negotiate to 12 months maximum",
663
- "estimated_difficulty": "hard"
664
- }
665
- ],
666
-
667
- "market_comparisons": [
668
- {
669
- "clause_category": "non_compete",
670
- "user_clause": "Employee shall not engage in any competitive business for 24 months...",
671
- "market_standard": "Employee shall not engage in competitive activities for 12 months within geographic area...",
672
- "similarity_score": 0.68,
673
- "assessment": "unfavorable",
674
- "explanation": "More aggressive than market standards",
675
- "recommendation": "Strongly recommend negotiating to align with market norms"
676
- }
677
- ],
678
-
679
- "executive_summary": "This employment contract has been analyzed and assigned an overall risk score of 78/100, classified as HIGH risk. SIGNIFICANT CONCERNS: This contract has several unfavorable terms that should be negotiated before execution...",
680
-
681
- "metadata": {
682
- "text_length": 15432,
683
- "word_count": 2876,
684
- "num_clauses": 12,
685
- "contract_type": "employment",
686
- "options": {
687
- "max_clauses": 15,
688
- "interpret_clauses": true,
689
- "generate_negotiation_points": true,
690
- "compare_to_market": true,
691
- "llm_provider": "ollama"
692
- }
693
- }
694
- }
695
- ```
696
-
697
- </details>
698
-
699
  ---
700
 
701
  ## 🔧 Technical Details
@@ -767,7 +402,6 @@ contract-guard-ai/
767
  │ ├── protection_checker.py # Missing protections checker
768
  │ ├── llm_interpreter.py # LLM-powered clause interpretation
769
  │ ├── negotiation_engine.py # Negotiation points generation
770
- │ └── market_comparator.py # Market standards comparison
771
 
772
  ├── utils/ # Utility functions
773
  │ ├── __init__.py
@@ -794,6 +428,7 @@ contract-guard-ai/
794
  ├── uploads/ # Temporary upload storage
795
 
796
  └── docs/ # Documentation
 
797
  └── BLOGPOST.md
798
  ```
799
 
@@ -848,20 +483,6 @@ Where:
848
  score = raw model confidence
849
  ```
850
 
851
- ### Performance Characteristics
852
-
853
- #### Latency Benchmarks
854
-
855
- | Operation | p50 | p95 | p99 |
856
- |-----------|-----|-----|-----|
857
- | Document Upload | 120ms | 250ms | 380ms |
858
- | Contract Classification | 180ms | 320ms | 450ms |
859
- | Clause Extraction | 2.1s | 4.8s | 7.2s |
860
- | Risk Analysis | 1.8s | 3.2s | 4.5s |
861
- | LLM Interpretation (10 clauses) | 8.5s | 15.2s | 22.1s |
862
- | **Full Pipeline** | **22.3s** | **38.7s** | **52.4s** |
863
-
864
-
865
  #### Memory Usage
866
 
867
  ```
@@ -874,325 +495,10 @@ Total (Peak): ~1.2GB
874
 
875
  ---
876
 
877
- ## ⚙️ Configuration
878
-
879
- ### Application Settings (config/settings.py)
880
-
881
- ```python
882
- from pydantic_settings import BaseSettings
883
-
884
- class Settings(BaseSettings):
885
- # Application
886
- APP_NAME: str = "AI Contract Risk Analyzer"
887
- APP_VERSION: str = "1.0.0"
888
- HOST: str = "0.0.0.0"
889
- PORT: int = 8000
890
- RELOAD: bool = False # Set to True for development
891
- WORKERS: int = 4
892
- LOG_LEVEL: str = "INFO"
893
-
894
- # CORS
895
- CORS_ORIGINS: list = ["*"]
896
- CORS_ALLOW_CREDENTIALS: bool = True
897
- CORS_ALLOW_METHODS: list = ["*"]
898
- CORS_ALLOW_HEADERS: list = ["*"]
899
-
900
- # File Upload
901
- MAX_UPLOAD_SIZE: int = 10 * 1024 * 1024 # 10MB
902
- ALLOWED_EXTENSIONS: list = [".pdf", ".docx"]
903
-
904
- # Analysis
905
- MIN_CONTRACT_LENGTH: int = 300
906
- MAX_CONTRACT_LENGTH: int = 500000
907
- MAX_CLAUSES_TO_ANALYZE: int = 15
908
-
909
- # Ollama
910
- OLLAMA_BASE_URL: str = "http://localhost:11434"
911
- OLLAMA_MODEL: str = "llama3:8b"
912
- OLLAMA_TIMEOUT: int = 120
913
-
914
- # OpenAI (optional)
915
- OPENAI_API_KEY: str = ""
916
-
917
- # Anthropic (optional)
918
- ANTHROPIC_API_KEY: str = ""
919
-
920
- # Cache
921
- ENABLE_CACHE: bool = True
922
- CACHE_TTL: int = 3600
923
-
924
- class Config:
925
- env_file = ".env"
926
- ```
927
-
928
- ### Model Configuration (config/model_config.py)
929
-
930
- ```python
931
- from pathlib import Path
932
-
933
- class ModelConfig:
934
- BASE_DIR = Path(__file__).parent.parent
935
- MODEL_DIR = BASE_DIR / "models"
936
- CACHE_DIR = BASE_DIR / "cache" / "models"
937
-
938
- # Legal-BERT Configuration
939
- LEGAL_BERT = {
940
- "model_name": "nlpaueb/legal-bert-base-uncased",
941
- "local_path": MODEL_DIR / "legal-bert",
942
- "dimension": 768,
943
- "max_length": 512
944
- }
945
-
946
- # Sentence Transformer Configuration
947
- EMBEDDING_MODEL = {
948
- "model_name": "sentence-transformers/all-MiniLM-L6-v2",
949
- "local_path": MODEL_DIR / "embeddings",
950
- "dimension": 384,
951
- "max_length": 256
952
- }
953
-
954
- # LLM Configuration
955
- LLM_CONFIG = {
956
- "base_url": "http://localhost:11434",
957
- "model": "llama3:8b",
958
- "timeout": 120
959
- }
960
- ```
961
-
962
- ### Risk Rules (config/risk_rules.py)
963
-
964
- ```python
965
- from enum import Enum
966
-
967
- class ContractType(Enum):
968
- EMPLOYMENT = "employment"
969
- CONSULTING = "consulting"
970
- NDA = "nda"
971
- SOFTWARE = "software"
972
- SERVICE = "service"
973
- PARTNERSHIP = "partnership"
974
- LEASE = "lease"
975
- PURCHASE = "purchase"
976
- GENERAL = "general"
977
-
978
- class RiskRules:
979
- # Critical keywords (highest risk)
980
- CRITICAL_KEYWORDS = {
981
- 'unlimited liability': 15,
982
- 'perpetual': 12,
983
- 'irrevocable': 12,
984
- 'forfeit': 10,
985
- 'liquidated damages': 10,
986
- 'wage withholding': 15,
987
- 'joint and several': 8
988
- }
989
-
990
- # High-risk keywords
991
- HIGH_RISK_KEYWORDS = {
992
- 'non-compete': 8,
993
- 'non-solicit': 7,
994
- 'penalty': 6,
995
- 'without cause': 7,
996
- 'sole discretion': 8,
997
- 'immediate termination': 7,
998
- 'at-will': 6
999
- }
1000
-
1001
- # Risk thresholds
1002
- RISK_THRESHOLDS = {
1003
- "critical": 80,
1004
- "high": 60,
1005
- "medium": 40,
1006
- "low": 20
1007
- }
1008
-
1009
- # Contract type-specific weight adjustments
1010
- TYPE_WEIGHTS = {
1011
- ContractType.EMPLOYMENT: {
1012
- "restrictive_covenants": 1.3,
1013
- "termination_rights": 1.2,
1014
- "compensation_benefits": 1.1
1015
- },
1016
- ContractType.CONSULTING: {
1017
- "penalties_liability": 1.3,
1018
- "intellectual_property": 1.2
1019
- },
1020
- ContractType.NDA: {
1021
- "restrictive_covenants": 1.4
1022
- }
1023
- }
1024
- ```
1025
-
1026
- ---
1027
-
1028
- ## 💻 Development
1029
-
1030
- ### Setting Up Development Environment
1031
-
1032
- ```bash
1033
- # Clone repository
1034
- git clone https://github.com/yourusername/contract-guard-ai.git
1035
- cd contract-guard-ai
1036
-
1037
- # Create virtual environment
1038
- python -m venv venv
1039
- source venv/bin/activate # Windows: venv\Scripts\activate
1040
-
1041
- # Install in editable mode with dev dependencies
1042
- pip install -e ".[dev]"
1043
-
1044
- # Install pre-commit hooks
1045
- pre-commit install
1046
-
1047
- # Copy environment template
1048
- cp .env.example .env
1049
- ```
1050
-
1051
- ### Running Tests
1052
-
1053
- ```bash
1054
- # Run all tests
1055
- pytest
1056
-
1057
- # Run with coverage
1058
- pytest --cov=. --cov-report=html
1059
-
1060
- # Run specific test file
1061
- pytest tests/test_classifier.py
1062
-
1063
- # Run with verbose output
1064
- pytest -v -s
1065
-
1066
- # Run only fast tests (skip slow integration tests)
1067
- pytest -m "not slow"
1068
- ```
1069
-
1070
- ### Code Quality
1071
-
1072
- ```bash
1073
- # Format code with Black
1074
- black .
1075
-
1076
- # Sort imports
1077
- isort .
1078
-
1079
- # Lint with Flake8
1080
- flake8 .
1081
-
1082
- # Type checking with MyPy
1083
- mypy .
1084
-
1085
- # All checks (pre-commit)
1086
- pre-commit run --all-files
1087
- ```
1088
-
1089
- ### Adding New Services
1090
-
1091
- 1. **Create service file** in `services/`:
1092
- ```python
1093
- # services/my_new_service.py
1094
- from utils.logger import ContractAnalyzerLogger, log_info
1095
-
1096
- class MyNewService:
1097
- def __init__(self):
1098
- self.logger = ContractAnalyzerLogger.get_logger()
1099
- log_info("MyNewService initialized")
1100
-
1101
- @ContractAnalyzerLogger.log_execution_time("my_operation")
1102
- def my_operation(self, input_data):
1103
- """Your service logic here"""
1104
- log_info("Processing...", input_size=len(input_data))
1105
- result = self._process(input_data)
1106
- return result
1107
- ```
1108
-
1109
- 2. **Register in services/__init__.py**:
1110
- ```python
1111
- from .my_new_service import MyNewService
1112
-
1113
- __all__ = [
1114
- # ... existing services
1115
- 'MyNewService'
1116
- ]
1117
- ```
1118
-
1119
- 3. **Add tests** in `tests/`:
1120
- ```python
1121
- # tests/test_my_new_service.py
1122
- import pytest
1123
- from services.my_new_service import MyNewService
1124
-
1125
- def test_my_operation():
1126
- service = MyNewService()
1127
- result = service.my_operation("test data")
1128
- assert result is not None
1129
- ```
1130
-
1131
- 4. **Integrate in app.py**:
1132
- ```python
1133
- # Add to service initialization
1134
- services["my_new_service"] = MyNewService()
1135
-
1136
- # Use in analysis pipeline
1137
- my_result = services["my_new_service"].my_operation(data)
1138
- ```
1139
-
1140
- ### Debugging
1141
-
1142
- ```bash
1143
- # Run with debug logging
1144
- LOG_LEVEL=DEBUG python app.py
1145
-
1146
- # Enable API debug mode
1147
- uvicorn app:app --reload --log-level debug
1148
-
1149
- # Python debugger (pdb)
1150
- import pdb; pdb.set_trace()
1151
-
1152
- # VS Code launch.json
1153
- {
1154
- "version": "0.2.0",
1155
- "configurations": [
1156
- {
1157
- "name": "FastAPI",
1158
- "type": "python",
1159
- "request": "launch",
1160
- "module": "uvicorn",
1161
- "args": ["app:app", "--reload", "--port", "8000"],
1162
- "jinja": true
1163
- }
1164
- ]
1165
- }
1166
- ```
1167
-
1168
  ## 📝 License
1169
 
1170
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
1171
 
1172
- ```
1173
- MIT License
1174
-
1175
- Copyright (c) 2025 AI Contract Risk Analyzer Contributors
1176
-
1177
- Permission is hereby granted, free of charge, to any person obtaining a copy
1178
- of this software and associated documentation files (the "Software"), to deal
1179
- in the Software without restriction, including without limitation the rights
1180
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1181
- copies of the Software, and to permit persons to whom the Software is
1182
- furnished to do so, subject to the following conditions:
1183
-
1184
- The above copyright notice and this permission notice shall be included in all
1185
- copies or substantial portions of the Software.
1186
-
1187
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1188
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1189
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1190
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1191
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1192
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1193
- SOFTWARE.
1194
- ```
1195
-
1196
  ---
1197
 
1198
  ## 🙏 Acknowledgments
@@ -1225,19 +531,29 @@ SOFTWARE.
1225
  | Model Management | ✅ Stable | 88% |
1226
  | Services | ✅ Stable | 85% |
1227
  | Documentation | ✅ Complete | 100% |
1228
- | Tests | 🟡 In Progress | 67% |
1229
- | Frontend | 🟡 Beta | N/A |
1230
 
1231
  ---
1232
 
 
 
 
 
 
 
 
1233
  <div align="center">
1234
 
1235
- **Made with ❤️ by the Contract Guard AI Team**
1236
 
1237
- [Website](https://contractguardai.com) • [Documentation](https://docs.contractguardai.com) • [Blog](https://blog.contractguardai.com)
 
1238
 
1239
  </div>
1240
 
1241
  ---
1242
 
1243
- *© 2025 AI Contract Risk Analyzer. Making legal intelligence accessible to everyone.*
 
 
 
1
+ ---
2
+ title: AI Contract Risk Analyzer
3
+ emoji: 📝
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_file: Dockerfile
8
+ pinned: false
9
+ license: mit
10
+ ---
11
+
12
  <div align="center">
13
 
14
  # AI Contract Risk Analyzer 🤖⚖️
 
16
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
17
  [![FastAPI](https://img.shields.io/badge/FastAPI-0.104+-green.svg)](https://fastapi.tiangolo.com/)
18
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
19
+ [![Hugging Face Spaces](https://img.shields.io/badge/🤗-Hugging%20Face%20Spaces-blue)](https://huggingface.co/spaces)
20
+ [![Transformers](https://img.shields.io/badge/🤗-Transformers-ffcc33)](https://huggingface.co/transformers/)
21
+ [![PyTorch](https://img.shields.io/badge/PyTorch-2.1+-ee4c2c)](https://pytorch.org/)
22
+ [![Legal-BERT](https://img.shields.io/badge/Legal--BERT-nlpaueb/legal--bert--base--uncased-orange)](https://huggingface.co/nlpaueb/legal-bert-base-uncased)
23
+ [![Sentence-BERT](https://img.shields.io/badge/Sentence--BERT-all--MiniLM--L6--v2-lightgrey)](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
24
+ [![Ollama](https://img.shields.io/badge/Ollama-llama3:8b-7c3aed)](https://ollama.ai/)
25
+ [![Docker](https://img.shields.io/badge/Docker-Ready-2496ed)](https://docker.com/)
26
+ [![spaCy](https://img.shields.io/badge/spaCy-3.7+-09a3d5)](https://spacy.io/)
27
 
28
  > **Democratizing Legal Intelligence Through AI**
29
+ > Comprehensive contract risk analysis using an integrated pipeline with Legal-BERT, multi-model NLP, and LLM interpretation
30
 
31
  </div>
32
 
 
33
  ## 🎯 Overview
34
 
35
+ The AI Contract Risk Analyzer is a production-grade legal document analysis platform that leverages state-of-the-art NLP and machine learning to provide instant, comprehensive contract risk assessment. Built with a unified orchestration architecture, it integrates Legal-BERT for clause understanding, semantic embeddings for similarity matching, and LLMs for natural language explanations.
36
 
37
  ### Key Features
38
 
39
+ - 📄 **Multi-Format Support**: PDF, DOCX, TXT document processing
40
+ - 🔍 **9 Contract Categories**: Employment, NDA, Lease, Service agreements, etc.
41
+ - ⚡ **Sub-60s Analysis**: Real-time risk scoring and clause extraction via pre-loaded models
42
  - 🔒 **Privacy-First**: Ephemeral processing, zero data retention
43
+ - 🌐 **LLM Integration**: Ollama (local), OpenAI, Anthropic support with fallback
44
+ - 📊 **Comprehensive Reports**: Executive summaries, negotiation playbooks, market comparisons, and downloadable PDFs
45
+ - 🔄 **Integrated Pipeline**: A single orchestrator (`PreloadedAnalysisService`) ensures consistent context propagation from classification through to final reporting
46
 
47
  ---
48
 
 
56
  - [Configuration](#-configuration)
57
  - [Development](#-development)
58
  - [Performance](#-performance)
59
+ - [Documentation & Blog](#-documentation--blog)
60
  - [License](#-license)
61
 
62
  ---
 
65
 
66
  ### System Overview
67
 
68
+ This diagram illustrates the core components and their interactions, highlighting the unified orchestration and the flow of context (specifically the `ContractType`) through the system.
69
+
70
  ```
71
  ┌─────────────────────────────────────────────────────────────┐
72
  │ Client Layer │
 
131
  └─────────────────────────────────────────────────────────────┘
132
  ```
133
 
134
+
135
+ ### Integrated Analysis Pipeline Flowchart
136
 
137
  ```mermaid
138
  graph TB
139
+ Start[User Uploads Contract] --> Read[Document Reader]
140
  Read --> Validate{Contract Validator}
141
  Validate -->|Invalid| Error[Return Error]
142
  Validate -->|Valid| Classify[Contract Classifier]
143
 
144
+ Classify --> Extract[RiskClauseExtractor]
145
+ Extract --> Analyze[TermAnalyzer + ProtectionChecker]
146
+ Analyze --> Score[RiskAnalyzer]
147
+ Score --> Generate[Output Generators]
148
 
149
+ Generate --> Sum[SummaryGenerator]
150
+ Generate --> Interp[LLM Interpreter]
151
+ Generate --> Neg[Negotiation Engine]
152
+ Generate --> PDF[PDF Report Generator]
153
 
154
+ Sum --> End[JSON Response]
155
+ Interp --> End
156
+ Neg --> End
157
+ PDF --> End
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  style Start fill:#e1f5e1
160
  style End fill:#e1f5e1
161
  style Error fill:#ffe1e1
162
  style Classify fill:#e1e5ff
163
  style Extract fill:#e1e5ff
164
+ style Score fill:#ffe5e1
165
+ style Generate fill:#fff5e1
 
166
  ```
167
 
168
  ### Component Diagram
169
 
170
  ```mermaid
171
  graph LR
172
+ subgraph "Client"
173
+ UI[Browser / API Client]
174
  end
175
+
176
+ subgraph "FastAPI Backend"
177
+ API[FastAPI Server]
178
+ PAS[PreloadedAnalysisService]
179
  end
180
+
181
+ subgraph "Core Services"
182
+ CC[Contract Classifier]
183
+ RCE[Risk Clause Extractor]
184
+ TA[Term Analyzer]
185
+ PC[Protection Checker]
186
+ RA[Comprehensive Risk Analyzer]
187
+ SG[Summary Generator]
188
+ LI[LLM Interpreter]
189
+ NE[Negotiation Engine]
190
+ PR[PDF Report Generator]
191
  end
192
+
193
+ subgraph "Model Management"
194
+ MM[Model Manager]
195
+ MR[Model Registry]
196
+ LM[LLM Manager]
 
197
  end
198
+
199
  subgraph "AI Models"
200
+ LB[Legal-BERT]
201
+ ST[Sentence-BERT]
202
+ OLM[Ollama]
203
+ OAI[OpenAI]
204
+ ANT[Anthropic]
205
  end
206
+
207
+ UI --> API
208
+ API --> PAS
209
+ PAS --> CC
210
+ PAS --> RCE
211
+ PAS --> TA
212
+ PAS --> PC
213
+ PAS --> RA
214
+ PAS --> SG
215
+ PAS --> LI
216
+ PAS --> NE
217
+ PAS --> PR
218
+
219
+ CC -.-> RCE
220
+ RCE --> TA
221
+ RCE --> PC
222
+ TA --> RA
223
+ PC --> RA
224
+ RCE --> RA
225
+
226
+ RA --> SG
227
+ RA --> LI
228
+ RA --> NE
229
+ SG --> PR
230
+ LI --> PR
231
+ NE --> PR
232
+
233
+ PAS --> MM
234
+ MM --> MR
235
+ MM --> LM
236
+
237
+ MR --> LB
238
+ MR --> ST
239
+ LM --> OLM
240
+ LM --> OAI
241
+ LM --> ANT
242
  ```
243
 
244
  ---
 
255
  GPU: Optional (3x speedup with NVIDIA GPU + CUDA 11.8+)
256
  ```
257
 
258
+ ### Quick Install
259
 
260
  ```bash
261
  # Clone repository
262
+ git clone https://github.com/satyaki-mitra/contract-guard-ai.git
263
  cd contract-guard-ai
264
 
265
  # Create virtual environment
 
272
  # Download spaCy model (optional, for advanced text processing)
273
  python -m spacy download en_core_web_sm
274
 
275
+ # Initialize models (on first run)
276
  python -c "from model_manager.model_loader import ModelLoader; ModelLoader()"
277
  ```
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  ---
280
 
281
  ## ⚡ Quick Start
 
283
  ### 1. Start Required Services
284
 
285
  ```bash
286
+ # Start Ollama (for local LLM features)
287
  ollama serve
288
 
289
  # Pull LLM model
 
324
  ### 3. Launch Application
325
 
326
  ```bash
327
+ # Option A: Start API only
 
 
 
328
  python app.py
329
 
330
+ # Option B: Use Uvicorn directly
331
  uvicorn app:app --reload --host 0.0.0.0 --port 8000
332
  ```
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  ---
335
 
336
  ## 🔧 Technical Details
 
402
  │ ├── protection_checker.py # Missing protections checker
403
  │ ├── llm_interpreter.py # LLM-powered clause interpretation
404
  │ ├── negotiation_engine.py # Negotiation points generation
 
405
 
406
  ├── utils/ # Utility functions
407
  │ ├── __init__.py
 
428
  ├── uploads/ # Temporary upload storage
429
 
430
  └── docs/ # Documentation
431
+ ├── API_DOCUMENTATION.md
432
  └── BLOGPOST.md
433
  ```
434
 
 
483
  score = raw model confidence
484
  ```
485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  #### Memory Usage
487
 
488
  ```
 
495
 
496
  ---
497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  ## 📝 License
499
 
500
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  ---
503
 
504
  ## 🙏 Acknowledgments
 
531
  | Model Management | ✅ Stable | 88% |
532
  | Services | ✅ Stable | 85% |
533
  | Documentation | ✅ Complete | 100% |
534
+ | Frontend | Stable | 80% |
535
+ | Tests | 🟡 In Progress | 50% |
536
 
537
  ---
538
 
539
+ ## 📚 Documentation & Blog
540
+
541
+ - For detailed technical documentation, including API endpoints, request/response schemas, and error handling, see the [API_DOCUMENTATION.md](docs/API_DOCUMENTATION.md) file.
542
+
543
+ - To learn about the research behind the system and our vision for democratizing legal intelligence, read our full [BLOGPOST.md](docs/BLOGPOST.md) file.
544
+ ---
545
+
546
  <div align="center">
547
 
548
+ **Made with ❤️ by the Itobuz Technologies Private Limited**
549
 
550
+ • [Documentation](docs/API_DOCUMENTATION.md)
551
+ • [Blog](docs/BLOGPOST.md)
552
 
553
  </div>
554
 
555
  ---
556
 
557
+ > *© 2025 AI Contract Risk Analyzer. Making legal intelligence accessible to everyone.*
558
+
559
+ ---
app.py CHANGED
@@ -1,509 +1,816 @@
1
- """
2
- FastAPI Application for AI Contract Risk Analyzer - UPDATED
3
- Complete integration with new services pipeline and frontend requirements
4
- """
5
- import signal
6
  import os
 
7
  import time
8
  import json
9
  import uuid
10
- from typing import Any, List, Dict, Optional
 
 
 
 
 
11
  from pathlib import Path
 
 
 
 
 
 
12
  from datetime import datetime
 
 
 
 
 
 
13
  from contextlib import asynccontextmanager
14
-
15
- import uvicorn
16
- import numpy as np
17
- from fastapi import FastAPI, File, UploadFile, HTTPException, Form, Request
18
- from fastapi.responses import JSONResponse, FileResponse, Response
19
- from fastapi.middleware.cors import CORSMiddleware
20
  from fastapi.staticfiles import StaticFiles
21
- from pydantic import BaseModel, Field
22
- import sys
23
 
24
  # Add parent directory to path
25
  sys.path.append(str(Path(__file__).parent))
26
 
27
- # Import all services - UPDATED WITH NEW SERVICES
 
28
  from config.settings import settings
29
  from config.risk_rules import ContractType
30
- from model_manager.model_loader import ModelLoader
31
- from model_manager.llm_manager import LLMManager, LLMProvider
32
- from utils.document_reader import DocumentReader
33
  from utils.validators import ContractValidator
34
  from utils.text_processor import TextProcessor
35
- from utils.logger import ContractAnalyzerLogger, log_info, log_error
36
-
37
- # UPDATED SERVICE IMPORTS
38
- from services.contract_classifier import ContractClassifier, ContractCategory
39
- from services.clause_extractor import ComprehensiveClauseExtractor, RiskClauseExtractor, ExtractedClause
40
- from services.risk_analyzer import RiskAnalyzer, RiskScore
41
- from services.term_analyzer import TermAnalyzer, UnfavorableTerm
42
- from services.protection_checker import ProtectionChecker, MissingProtection
43
- from services.llm_interpreter import LLMClauseInterpreter, ClauseInterpretation, RiskInterpretation
44
- from services.negotiation_engine import NegotiationEngine, NegotiationPlaybook, NegotiationPoint
 
 
 
 
 
 
 
 
45
  from services.summary_generator import SummaryGenerator
 
 
 
 
 
 
46
 
47
- # Import PDF generator
48
- from reporter.pdf_generator import generate_pdf_report
49
 
50
  # ============================================================================
51
- # CUSTOM SERIALIZATION (UNCHANGED)
52
  # ============================================================================
53
-
54
  class NumpyJSONEncoder(json.JSONEncoder):
55
  def default(self, obj: Any) -> Any:
56
  if isinstance(obj, (np.float32, np.float64)):
57
  return float(obj)
 
58
  elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
59
  return int(obj)
 
60
  elif isinstance(obj, np.ndarray):
61
  return obj.tolist()
 
62
  elif isinstance(obj, np.bool_):
63
  return bool(obj)
 
64
  elif hasattr(obj, 'item'):
65
  return obj.item()
 
66
  elif hasattr(obj, 'to_dict'):
67
  return obj.to_dict()
 
68
  elif hasattr(obj, 'dict'):
69
  return obj.dict()
 
70
  elif isinstance(obj, (set, tuple)):
71
  return list(obj)
 
72
  return super().default(obj)
73
 
 
74
  class NumpyJSONResponse(JSONResponse):
75
  def render(self, content: Any) -> bytes:
76
- return json.dumps(
77
- content,
78
- ensure_ascii=False,
79
- allow_nan=False,
80
- indent=None,
81
- separators=(",", ":"),
82
- cls=NumpyJSONEncoder,
83
- ).encode("utf-8")
 
84
 
85
  def convert_numpy_types(obj: Any) -> Any:
86
  if obj is None:
87
  return None
 
88
  if isinstance(obj, dict):
89
  return {key: convert_numpy_types(value) for key, value in obj.items()}
 
90
  elif isinstance(obj, (list, tuple, set)):
91
  return [convert_numpy_types(item) for item in obj]
 
92
  elif isinstance(obj, (np.float32, np.float64)):
93
  return float(obj)
 
94
  elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
95
  return int(obj)
 
96
  elif isinstance(obj, np.ndarray):
97
  return obj.tolist()
 
98
  elif isinstance(obj, np.bool_):
99
  return bool(obj)
 
100
  elif hasattr(obj, 'item'):
101
  return obj.item()
 
102
  elif hasattr(obj, 'to_dict'):
103
  return convert_numpy_types(obj.to_dict())
 
104
  elif hasattr(obj, 'dict'):
105
  return convert_numpy_types(obj.dict())
 
106
  else:
107
  return obj
108
 
 
109
  def safe_serialize_response(data: Any) -> Any:
110
  return convert_numpy_types(data)
111
 
112
- # ============================================================================
113
- # PYDANTIC SCHEMAS - UPDATED FOR FRONTEND COMPATIBILITY
114
- # ============================================================================
115
 
 
116
  class SerializableBaseModel(BaseModel):
117
  def dict(self, *args, **kwargs) -> Dict[str, Any]:
118
  data = super().dict(*args, **kwargs)
119
  return convert_numpy_types(data)
 
120
 
121
  def json(self, *args, **kwargs) -> str:
122
  data = self.dict(*args, **kwargs)
123
- return json.dumps(data, cls=NumpyJSONEncoder, *args, **kwargs)
 
124
 
125
  class HealthResponse(SerializableBaseModel):
126
- status: str
127
- version: str
128
- timestamp: str
129
- models_loaded: int
130
- services_loaded: int
131
- memory_usage_mb: float
 
132
 
133
  class AnalysisOptions(SerializableBaseModel):
134
- max_clauses: int = Field(default=15, ge=5, le=30)
135
- interpret_clauses: bool = Field(default=True)
136
- generate_negotiation_points: bool = Field(default=True)
137
- compare_to_market: bool = Field(default=False) # Disabled for now
 
138
 
139
  class AnalysisResult(SerializableBaseModel):
140
- analysis_id: str
141
- timestamp: str
142
- classification: Dict[str, Any]
143
- clauses: List[Dict[str, Any]]
144
- risk_analysis: Dict[str, Any]
145
- unfavorable_terms: List[Dict[str, Any]]
146
- missing_protections: List[Dict[str, Any]]
147
- clause_interpretations: Optional[List[Dict[str, Any]]] = None
148
- negotiation_points: Optional[List[Dict[str, Any]]] = None
149
- market_comparisons: Optional[List[Dict[str, Any]]] = None
150
- executive_summary: str
151
- metadata: Dict[str, Any]
152
- pdf_available: bool = True
 
153
 
154
  class ErrorResponse(SerializableBaseModel):
155
- error: str
156
- detail: str
157
- timestamp: str
 
158
 
159
  class FileValidationResponse(SerializableBaseModel):
160
- valid: bool
161
- message: str
162
- confidence: Optional[float] = None
163
- report: Optional[Dict[str, Any]] = None
164
 
165
- # ============================================================================
166
- # SERVICE INITIALIZATION WITH FULL PIPELINE INTEGRATION
167
- # ============================================================================
168
 
 
169
  class PreloadedAnalysisService:
170
- """Analysis service with complete pipeline integration"""
171
-
 
172
  def __init__(self):
173
- self.model_loader = ModelLoader()
174
- self.llm_manager = LLMManager()
175
- self.services = {}
176
- self.service_status = {}
177
  self.memory_usage_mb = 0
 
178
  self._preload_all_services()
179
-
 
180
  def _preload_all_services(self):
181
- """Pre-load ALL services and models at initialization"""
 
 
182
  log_info("PRE-LOADING ALL AI MODELS AND SERVICES")
183
-
184
  try:
185
  initial_memory = self._get_memory_usage()
186
-
187
- # 1. Pre-load Contract Classifier
188
  log_info("🔄 Pre-loading Contract Classifier...")
189
- self.services["classifier"] = ContractClassifier(self.model_loader)
190
- self.service_status["classifier"] = "loaded"
191
- log_info(" Contract Classifier loaded")
 
192
 
193
- # 2. Pre-load Comprehensive Clause Extractor
 
 
 
 
194
  log_info("🔄 Pre-loading Comprehensive Clause Extractor...")
195
- self.services["clause_extractor"] = ComprehensiveClauseExtractor(self.model_loader)
196
- self.service_status["clause_extractor"] = "loaded"
197
- log_info(" Comprehensive Clause Extractor loaded")
 
 
198
 
199
- # 3. Pre-load Risk Analyzer (Main Orchestrator)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  log_info("🔄 Pre-loading Risk Analyzer...")
201
- self.services["risk_analyzer"] = RiskAnalyzer(self.model_loader)
202
- self.service_status["risk_analyzer"] = "loaded"
203
- log_info(" Risk Analyzer loaded")
 
 
 
204
 
205
- # 4. Pre-load LLM Interpreter
 
 
 
 
206
  log_info("🔄 Pre-loading LLM Interpreter...")
207
  try:
208
- self.services["llm_interpreter"] = LLMClauseInterpreter(self.llm_manager)
209
  self.service_status["llm_interpreter"] = "loaded"
 
210
  log_info("✅ LLM Interpreter loaded")
 
211
  except Exception as e:
212
- self.services["llm_interpreter"] = None
213
- self.service_status["llm_interpreter"] = f"failed: {str(e)}"
 
214
  log_info("⚠️ LLM Interpreter not available")
215
-
216
- # 5. Pre-load Negotiation Engine
217
  log_info("🔄 Pre-loading Negotiation Engine...")
218
  try:
219
- self.services["negotiation_engine"] = NegotiationEngine(self.llm_manager)
 
 
 
220
  self.service_status["negotiation_engine"] = "loaded"
 
221
  log_info("✅ Negotiation Engine loaded")
 
222
  except Exception as e:
223
- self.services["negotiation_engine"] = None
224
- self.service_status["negotiation_engine"] = f"failed: {str(e)}"
 
225
  log_info("⚠️ Negotiation Engine not available")
226
-
227
- # 6. Pre-load Summary Generator
228
  log_info("🔄 Pre-loading Summary Generator...")
229
  try:
230
- self.services["summary_generator"] = SummaryGenerator(self.llm_manager)
 
231
  self.service_status["summary_generator"] = "loaded"
 
232
  log_info("✅ Summary Generator loaded")
 
233
  except Exception as e:
234
- self.services["summary_generator"] = SummaryGenerator()
 
235
  self.service_status["summary_generator"] = "fallback_loaded"
 
236
  log_info("⚠️ Summary Generator using fallback mode")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
 
 
 
 
238
  # Calculate memory usage
239
- final_memory = self._get_memory_usage()
240
  self.memory_usage_mb = final_memory - initial_memory
241
 
242
  log_info("🎉 ALL SERVICES PRE-LOADED SUCCESSFULLY!")
243
  log_info(f"📊 Memory Usage: {self.memory_usage_mb:.2f} MB")
244
  log_info(f"🔧 Services Loaded: {len(self.service_status)}")
245
-
246
  except Exception as e:
247
  log_error(f"CRITICAL: Failed to pre-load services: {e}")
248
  raise
249
-
 
250
  def _get_memory_usage(self) -> float:
251
- """Get current memory usage in MB"""
 
 
252
  try:
253
  import psutil
254
  process = psutil.Process()
255
  return process.memory_info().rss / 1024 / 1024
 
256
  except ImportError:
257
  return 0.0
258
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  def get_service_status(self) -> Dict[str, Any]:
260
- """Get detailed service status"""
 
 
261
  model_stats = self.model_loader.get_registry_stats()
262
- return {
263
- "services": self.service_status,
264
- "models": model_stats,
265
- "memory_usage_mb": self.memory_usage_mb,
266
- "total_services_loaded": len([s for s in self.service_status.values() if "loaded" in str(s)]),
267
- "total_models_loaded": model_stats.get("loaded_models", 0)
268
- }
269
-
270
  def analyze_contract(self, contract_text: str, options: AnalysisOptions) -> Dict[str, Any]:
271
- """Complete contract analysis using full pipeline"""
 
 
272
  try:
273
  log_info("Starting comprehensive contract analysis pipeline...")
274
-
275
- # Step 1: Classify contract
276
- classification = self.services["classifier"].classify_contract(contract_text)
277
- classification_dict = safe_serialize_response(classification.to_dict())
278
  log_info(f"Contract classified as: {classification.category}")
279
 
280
- # Step 2: Extract clauses
281
- clauses = self.services["clause_extractor"].extract_clauses(
282
- contract_text, options.max_clauses
283
- )
284
- clauses_dict = [safe_serialize_response(clause.to_dict()) for clause in clauses]
285
- log_info(f"Extracted {len(clauses)} clauses")
 
 
 
 
 
 
 
 
 
286
 
287
- # Step 3: Map to ContractType
288
- contract_type = self._get_contract_type_enum(classification.category)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
- # Step 4: Complete Risk Analysis (Main Orchestrator)
291
- risk_score = self.services["risk_analyzer"].analyze_contract_risk(contract_text)
292
- risk_dict = safe_serialize_response(risk_score.to_dict())
293
- log_info(f"Risk analysis completed: {risk_score.overall_score}/100")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
- # Extract components from risk analysis for further processing
296
- unfavorable_terms = risk_score.unfavorable_terms
297
- missing_protections = risk_score.missing_protections
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- # Step 5: Generate LLM Interpretations (if enabled and available)
300
- interpretations_dict = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  risk_interpretation = None
302
 
303
- if options.interpret_clauses and self.services["llm_interpreter"]:
304
  try:
305
- risk_interpretation = self.services["llm_interpreter"].interpret_with_risk_context(
306
- clauses=clauses,
307
- unfavorable_terms=unfavorable_terms,
308
- missing_protections=missing_protections,
309
- contract_type=contract_type,
310
- overall_risk_score=risk_score.overall_score,
311
- max_clauses=min(10, options.max_clauses)
312
- )
313
- interpretations_dict = [
314
- safe_serialize_response(interp.to_dict())
315
- for interp in risk_interpretation.clause_interpretations
316
- ]
317
- log_info(f"Generated {len(interpretations_dict)} clause interpretations")
318
  except Exception as e:
319
- log_error(f"LLM interpretation failed: {e}")
320
- interpretations_dict = []
321
 
322
- # Step 6: Generate Negotiation Points (if enabled and available)
323
- negotiation_dict = []
 
 
 
 
 
 
 
 
 
324
  negotiation_playbook = None
 
325
 
326
- if options.generate_negotiation_points and self.services["negotiation_engine"]:
327
  try:
328
- negotiation_playbook = self.services["negotiation_engine"].generate_comprehensive_playbook(
329
- risk_analysis=risk_score,
330
- risk_interpretation=risk_interpretation or RiskInterpretation(
331
- overall_risk_explanation="",
332
- key_concerns=[],
333
- negotiation_strategy="",
334
- market_comparison="",
335
- clause_interpretations=[]
336
- ),
337
- unfavorable_terms=unfavorable_terms,
338
- missing_protections=missing_protections,
339
- clauses=clauses,
340
- contract_type=contract_type,
341
- max_points=8 # Match frontend limit
342
- )
343
 
344
- negotiation_dict = [
345
- safe_serialize_response(point.to_dict())
346
- for point in negotiation_playbook.critical_points
347
- ]
348
- log_info(f"Generated {len(negotiation_dict)} negotiation points")
 
 
 
 
 
 
 
 
349
 
350
  except Exception as e:
351
- log_error(f"Negotiation engine failed: {e}")
352
- print(f"🔍 DEBUG: Negotiation engine exception: {e}")
353
- import traceback
354
- print(f"🔍 DEBUG: Full traceback: {traceback.format_exc()}")
355
- negotiation_dict = []
356
-
357
- # Step 7: Generate Executive Summary
358
- executive_summary = self.services["summary_generator"].generate_comprehensive_summary(
359
- contract_text=contract_text,
360
- classification=classification,
361
- risk_analysis=risk_score,
362
- risk_interpretation=risk_interpretation or RiskInterpretation(
363
- overall_risk_explanation="",
364
- key_concerns=[],
365
- negotiation_strategy="",
366
- market_comparison="",
367
- clause_interpretations=[]
368
- ),
369
- negotiation_playbook=negotiation_playbook or NegotiationPlaybook(
370
- overall_strategy="",
371
- critical_points=[],
372
- walk_away_items=[],
373
- concession_items=[],
374
- timing_guidance="",
375
- risk_mitigation_plan=""
376
- ),
377
- unfavorable_terms=unfavorable_terms,
378
- missing_protections=missing_protections,
379
- clauses=clauses
380
- )
381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  # Build final result matching frontend expectations
383
- result = {
384
- "analysis_id": str(uuid.uuid4()),
385
- "timestamp": datetime.now().isoformat(),
386
- "classification": classification_dict,
387
- "clauses": clauses_dict,
388
- "risk_analysis": risk_dict, # Contains overall_score, risk_level, category_scores, risk_breakdown
389
- "unfavorable_terms": [safe_serialize_response(term) for term in unfavorable_terms],
390
- "missing_protections": [safe_serialize_response(prot) for prot in missing_protections],
391
- "clause_interpretations": interpretations_dict,
392
- "negotiation_points": negotiation_dict,
393
- "market_comparisons": [], # Disabled for now
394
- "executive_summary": executive_summary,
395
- "metadata": {
396
- "text_length": len(contract_text),
397
- "word_count": len(contract_text.split()),
398
- "num_clauses": len(clauses),
399
- "contract_type": contract_type.value,
400
- "actual_category": classification.category,
401
- "options": options.dict()
402
- },
403
- "pdf_available": True
404
- }
405
-
 
406
  log_info("Contract analysis completed successfully")
407
  return result
408
-
409
  except Exception as e:
410
- log_error(f"Contract analysis failed: {e}")
411
  raise
412
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  def _get_contract_type_enum(self, category_str: str) -> ContractType:
414
- """Convert category string to ContractType enum"""
415
- mapping = {
416
- 'employment': ContractType.EMPLOYMENT,
417
- 'consulting': ContractType.CONSULTING,
418
- 'nda': ContractType.NDA,
419
- 'software': ContractType.SOFTWARE,
420
- 'service': ContractType.SERVICE,
421
- 'partnership': ContractType.PARTNERSHIP,
422
- 'lease': ContractType.LEASE,
423
- 'purchase': ContractType.PURCHASE,
424
- 'general': ContractType.GENERAL,
425
- }
426
- return mapping.get(category_str, ContractType.GENERAL)
 
 
 
 
 
 
427
 
428
- # ============================================================================
429
- # FASTAPI APPLICATION (UNCHANGED STRUCTURE, UPDATED IMPLEMENTATION)
430
- # ============================================================================
431
 
432
- # Global instances
433
- analysis_service: Optional[PreloadedAnalysisService] = None
434
- app_start_time = time.time()
 
435
 
436
  # Initialize logger
437
- ContractAnalyzerLogger.setup(log_dir="logs", app_name="contract_analyzer")
 
 
 
438
  logger = ContractAnalyzerLogger.get_logger()
439
 
 
440
  @asynccontextmanager
441
  async def lifespan(app: FastAPI):
442
  global analysis_service
443
-
444
  log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTING UP...")
445
  log_info("=" * 80)
446
 
447
  try:
448
  analysis_service = PreloadedAnalysisService()
449
  log_info("✅ All services initialized successfully")
 
450
  except Exception as e:
451
  log_error(f"Startup failed: {e}")
452
  raise
453
-
454
  log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
455
  log_info("=" * 80)
456
  log_info("✅ AI Contract Risk Analyzer Ready!")
457
-
458
  try:
459
  yield
 
460
  finally:
461
  log_info("🛑 Shutting down server...")
462
  log_info("✅ Server shutdown complete")
463
 
464
- app = FastAPI(
465
- title=settings.APP_NAME,
466
- version=settings.APP_VERSION,
467
- description="AI-powered contract risk analysis with complete model pre-loading",
468
- docs_url="/api/docs",
469
- redoc_url="/api/redoc",
470
- default_response_class=NumpyJSONResponse,
471
- lifespan=lifespan
472
- )
473
 
474
  # Get absolute paths
475
- BASE_DIR = Path(__file__).parent
476
  STATIC_DIR = BASE_DIR / "static"
477
 
478
  # Serve static files
479
- app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
480
 
481
- # Enhanced CORS middleware
482
- app.add_middleware(
483
- CORSMiddleware,
484
- allow_origins=["*"],
485
- allow_credentials=True,
486
- allow_methods=["*"],
487
- allow_headers=["*"],
488
- )
489
 
490
- # ============================================================================
491
- # HELPER FUNCTIONS (UNCHANGED)
492
- # ============================================================================
493
 
 
494
  def validate_file(file: UploadFile) -> tuple[bool, str]:
495
- file_ext = os.path.splitext(file.filename)[1].lower()
496
- if file_ext not in settings.ALLOWED_EXTENSIONS:
 
497
  return False, f"Invalid file type. Allowed: {', '.join(settings.ALLOWED_EXTENSIONS)}"
498
 
499
  file.file.seek(0, 2)
500
  size = file.file.tell()
 
501
  file.file.seek(0)
502
 
503
- if size > settings.MAX_UPLOAD_SIZE:
504
- return False, f"File too large. Max size: {settings.MAX_UPLOAD_SIZE / (1024*1024)}MB"
505
 
506
- if size == 0:
507
  return False, "File is empty"
508
 
509
  return True, "OK"
@@ -514,7 +821,6 @@ def read_contract_file(file) -> str:
514
  Read contract file and return text content.
515
  """
516
  reader = DocumentReader()
517
-
518
  # Extract file extension without dot
519
  filename = file.filename.lower()
520
  file_extension = Path(filename).suffix.lower().lstrip('.')
@@ -525,299 +831,353 @@ def read_contract_file(file) -> str:
525
  print(f"📁 DEBUG app.py - No extension found, defaulting to: '{file_extension}'")
526
 
527
  file_contents = reader.read_file(file.file, file_extension)
528
-
529
  if (not file_contents or not file_contents.strip()):
530
  raise ValueError("Could not extract text from file")
531
-
532
  return file_contents
533
 
534
 
535
-
536
  def validate_contract_text(text: str) -> tuple[bool, str]:
537
  if not text or not text.strip():
538
  return False, "Contract text is empty"
539
 
540
- if len(text) < settings.MIN_CONTRACT_LENGTH:
541
  return False, f"Contract text too short. Minimum {settings.MIN_CONTRACT_LENGTH} characters required."
542
 
543
- if len(text) > settings.MAX_CONTRACT_LENGTH:
544
  return False, f"Contract text too long. Maximum {settings.MAX_CONTRACT_LENGTH} characters allowed."
545
 
546
  return True, "OK"
547
 
548
- # ============================================================================
549
- # API ROUTES (UNCHANGED INTERFACE, UPDATED IMPLEMENTATION)
550
- # ============================================================================
551
 
 
 
552
  @app.get("/")
553
  async def serve_frontend():
554
  return FileResponse(str(STATIC_DIR / "index.html"))
555
 
556
- @app.get("/api/v1/health", response_model=HealthResponse)
 
557
  async def health_check():
558
  if not analysis_service:
559
- raise HTTPException(status_code=503, detail="Service not initialized")
560
-
 
 
561
  service_status = analysis_service.get_service_status()
562
-
563
- return HealthResponse(
564
- status="healthy",
565
- version=settings.APP_VERSION,
566
- timestamp=datetime.now().isoformat(),
567
- models_loaded=service_status["total_models_loaded"],
568
- services_loaded=service_status["total_services_loaded"],
569
- memory_usage_mb=service_status["memory_usage_mb"]
570
- )
571
 
572
  @app.get("/api/v1/status")
573
  async def get_detailed_status():
574
  if not analysis_service:
575
- raise HTTPException(status_code=503, detail="Service not initialized")
 
 
 
576
  return analysis_service.get_service_status()
577
 
578
- @app.post("/api/v1/analyze/file", response_model=AnalysisResult)
579
- async def analyze_contract_file(
580
- file: UploadFile = File(...),
581
- max_clauses: int = Form(15),
582
- interpret_clauses: bool = Form(True),
583
- generate_negotiation_points: bool = Form(True),
584
- compare_to_market: bool = Form(False) # Disabled for now
585
- ):
586
  if not analysis_service:
587
- raise HTTPException(status_code=503, detail="Service not initialized")
588
-
 
 
589
  try:
590
  # Validate file
591
  is_valid, message = validate_file(file)
 
592
  if not is_valid:
593
- raise HTTPException(status_code=400, detail=message)
594
-
 
 
595
  # Read contract text
596
- contract_text = read_contract_file(file)
597
-
598
  # Validate contract text
599
  is_valid_text, text_message = validate_contract_text(contract_text)
 
600
  if not is_valid_text:
601
- raise HTTPException(status_code=400, detail=text_message)
602
-
 
 
603
  # Validate contract structure using ContractValidator
604
- validator = ContractValidator()
605
  is_valid_contract, contract_type, confidence = validator.is_valid_contract(contract_text)
606
-
607
  if not is_valid_contract:
608
- raise HTTPException(status_code=400, detail=f"Invalid contract: {confidence}")
609
-
 
 
610
  # Create analysis options
611
- options = AnalysisOptions(
612
- max_clauses=min(max_clauses, settings.MAX_CLAUSES_TO_ANALYZE),
613
- interpret_clauses=interpret_clauses,
614
- generate_negotiation_points=generate_negotiation_points,
615
- compare_to_market=compare_to_market
616
- )
617
-
618
  # Perform analysis
619
- result = analysis_service.analyze_contract(contract_text, options)
620
-
621
  log_info(f"File analysis completed",
622
- filename=file.filename,
623
- analysis_id=result["analysis_id"],
624
- risk_score=result["risk_analysis"]["overall_score"])
625
-
 
626
  return AnalysisResult(**result)
627
-
628
  except HTTPException:
629
  raise
 
630
  except Exception as e:
631
- log_error(f"File analysis failed: {e}")
632
- raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
633
-
634
- @app.post("/api/v1/analyze/text", response_model=AnalysisResult)
635
- async def analyze_contract_text(
636
- contract_text: str = Form(..., description="Contract text to analyze"),
637
- max_clauses: int = Form(15),
638
- interpret_clauses: bool = Form(True),
639
- generate_negotiation_points: bool = Form(True),
640
- compare_to_market: bool = Form(False) # Disabled for now
641
- ):
642
  if not analysis_service:
643
- raise HTTPException(status_code=503, detail="Service not initialized")
644
-
 
645
  try:
646
  # Validate contract text length first
647
  is_valid, message = validate_contract_text(contract_text)
648
- if not is_valid:
649
- raise HTTPException(status_code=400, detail=message)
650
 
 
 
 
 
 
651
  # Validate contract structure using ContractValidator
652
- validator = ContractValidator()
653
  is_valid_contract, validation_type, message = validator.is_valid_contract(contract_text)
654
 
655
  if not is_valid_contract:
656
  error_message = message if "does not appear to be a legal contract" in message else "The provided document does not appear to be a legal contract. Please upload a valid contract for analysis."
657
- raise HTTPException(status_code=400, detail=error_message)
658
-
 
 
659
  # Create analysis options
660
- options = AnalysisOptions(
661
- max_clauses=min(max_clauses, settings.MAX_CLAUSES_TO_ANALYZE),
662
- interpret_clauses=interpret_clauses,
663
- generate_negotiation_points=generate_negotiation_points,
664
- compare_to_market=compare_to_market
665
- )
666
-
667
  # Perform analysis
668
- result = analysis_service.analyze_contract(contract_text, options)
669
-
670
  log_info(f"Text analysis completed",
671
- analysis_id=result["analysis_id"],
672
- risk_score=result["risk_analysis"]["overall_score"])
 
673
 
674
  return AnalysisResult(**result)
675
-
676
  except HTTPException:
677
  raise
 
678
  except Exception as e:
679
- log_error(f"Text analysis failed: {e}")
680
- raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
 
 
 
 
681
 
682
  @app.post("/api/v1/generate-pdf")
683
  async def generate_pdf_from_analysis(analysis_result: Dict[str, Any]):
684
  try:
685
- pdf_buffer = generate_pdf_report(analysis_result)
686
-
 
 
 
 
687
  analysis_id = analysis_result.get('analysis_id', 'report')
688
- return Response(
689
- content=pdf_buffer.getvalue(),
690
- media_type="application/pdf",
691
- headers={
692
- "Content-Disposition": f"attachment; filename=contract_analysis_{analysis_id}.pdf"
693
- }
694
- )
695
  except Exception as e:
696
- log_error(f"PDF generation failed: {e}")
697
- raise HTTPException(status_code=500, detail=f"Failed to generate PDF: {str(e)}")
 
 
 
 
698
 
699
  @app.get("/api/v1/categories")
700
  async def get_contract_categories():
701
  if not analysis_service:
702
- raise HTTPException(status_code=503, detail="Service not initialized")
 
 
703
 
704
  try:
705
- categories = analysis_service.services["classifier"].get_all_categories()
706
- return {"categories": categories}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
  except Exception as e:
708
- log_error(f"Categories fetch failed: {e}")
709
- raise HTTPException(status_code=500, detail=f"Failed to get categories: {str(e)}")
 
 
710
 
711
- @app.post("/api/v1/validate/file", response_model=FileValidationResponse)
712
- async def validate_contract_file(file: UploadFile = File(...)):
713
  try:
714
  is_valid, message = validate_file(file)
715
  if not is_valid:
716
- return FileValidationResponse(valid=False, message=message)
717
-
 
 
718
  contract_text = read_contract_file(file)
719
-
720
  # Validate text length
721
  is_valid_text, text_message = validate_contract_text(contract_text)
722
- if not is_valid_text:
723
- return FileValidationResponse(valid=False, message=text_message)
724
 
 
 
 
 
 
725
  # Validate contract structure using ContractValidator
726
  validator = ContractValidator()
727
- report = validator.get_validation_report(contract_text)
728
-
729
- return FileValidationResponse(
730
- valid=report["scores"]["total"] > 50 and is_valid_text,
731
- message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
732
- confidence=report["scores"]["total"],
733
- report=report
734
- )
735
 
 
 
 
 
 
 
736
  except Exception as e:
737
  log_error(f"File validation failed: {e}")
738
- raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
 
 
 
739
 
740
- @app.post("/api/v1/validate/text", response_model=FileValidationResponse)
 
741
  async def validate_contract_text_endpoint(contract_text: str = Form(...)):
742
  try:
743
  # Validate text length
744
  is_valid, message = validate_contract_text(contract_text)
745
- if not is_valid:
746
- return FileValidationResponse(valid=False, message=message)
747
 
 
 
 
 
 
748
  # Validate contract structure using ContractValidator
749
  validator = ContractValidator()
750
- report = validator.get_validation_report(contract_text)
751
-
752
- return FileValidationResponse(
753
- valid=report["scores"]["total"] > 50 and is_valid,
754
- message="Contract appears valid" if report["scores"]["total"] > 50 else "May not be a valid contract",
755
- confidence=report["scores"]["total"],
756
- report=report
757
- )
758
 
 
 
 
 
 
 
759
  except Exception as e:
760
- log_error(f"Text validation failed: {e}")
761
- raise HTTPException(status_code=400, detail=f"Validation failed: {str(e)}")
 
 
762
 
763
- # ============================================================================
764
- # ERROR HANDLERS AND MIDDLEWARE (UNCHANGED)
765
- # ============================================================================
766
 
 
767
  @app.exception_handler(HTTPException)
768
  async def http_exception_handler(request, exc):
769
- return NumpyJSONResponse(
770
- status_code=exc.status_code,
771
- content=ErrorResponse(
772
- error=exc.detail,
773
- detail=str(exc.detail),
774
- timestamp=datetime.now().isoformat()
775
- ).dict()
776
- )
777
 
778
  @app.exception_handler(Exception)
779
  async def general_exception_handler(request, exc):
780
  log_error(f"Unhandled exception: {exc}")
781
- return NumpyJSONResponse(
782
- status_code=500,
783
- content=ErrorResponse(
784
- error="Internal server error",
785
- detail=str(exc),
786
- timestamp=datetime.now().isoformat()
787
- ).dict()
788
- )
789
 
790
  @app.middleware("http")
791
  async def log_requests(request: Request, call_next):
792
- start_time = time.time()
793
- response = await call_next(request)
794
  process_time = time.time() - start_time
795
 
796
  log_info(f"API Request: {request.method} {request.url.path} - Status: {response.status_code} - Duration: {process_time:.3f}s")
797
 
798
  return response
799
 
800
- # ============================================================================
801
- # MAIN (UNCHANGED)
802
- # ============================================================================
803
  if __name__ == "__main__":
804
  def signal_handler(sig, frame):
805
  print("\n👋 Received Ctrl+C, shutting down gracefully...")
806
  sys.exit(0)
807
-
808
  signal.signal(signal.SIGINT, signal_handler)
809
-
810
  try:
811
- uvicorn.run(
812
- "app:app",
813
- host=settings.HOST,
814
- port=settings.PORT,
815
- reload=settings.RELOAD,
816
- workers=1,
817
- log_level=settings.LOG_LEVEL.lower()
818
- )
819
  except KeyboardInterrupt:
820
  print("\n🎯 Server stopped by user")
 
821
  except Exception as e:
822
  log_error(f"Server error: {e}")
823
- sys.exit(1)
 
 
1
+ # app.py
2
+ # DEPENDENCIES
 
 
 
3
  import os
4
+ import sys
5
  import time
6
  import json
7
  import uuid
8
+ import signal
9
+ import uvicorn
10
+ import numpy as np
11
+ from typing import Any
12
+ from typing import List
13
+ from typing import Dict
14
  from pathlib import Path
15
+ from fastapi import File
16
+ from fastapi import Form
17
+ from pydantic import Field
18
+ from fastapi import FastAPI
19
+ from fastapi import Request
20
+ from typing import Optional
21
  from datetime import datetime
22
+ from pydantic import BaseModel
23
+ from fastapi import UploadFile
24
+ from fastapi import HTTPException
25
+ from fastapi.responses import Response
26
+ from fastapi.responses import JSONResponse
27
+ from fastapi.responses import FileResponse
28
  from contextlib import asynccontextmanager
 
 
 
 
 
 
29
  from fastapi.staticfiles import StaticFiles
30
+ from fastapi.middleware.cors import CORSMiddleware
 
31
 
32
  # Add parent directory to path
33
  sys.path.append(str(Path(__file__).parent))
34
 
35
+ from utils.logger import log_info
36
+ from utils.logger import log_error
37
  from config.settings import settings
38
  from config.risk_rules import ContractType
39
+ from services.data_models import RiskScore
 
 
40
  from utils.validators import ContractValidator
41
  from utils.text_processor import TextProcessor
42
+ from services.data_models import SummaryContext
43
+ from utils.logger import ContractAnalyzerLogger
44
+ from services.risk_analyzer import RiskAnalyzer
45
+ from services.term_analyzer import TermAnalyzer
46
+ from services.data_models import ExtractedClause
47
+ from services.data_models import UnfavorableTerm
48
+ from utils.document_reader import DocumentReader
49
+ from model_manager.llm_manager import LLMManager
50
+ from services.data_models import NegotiationPoint
51
+ from services.data_models import ContractCategory
52
+ from model_manager.llm_manager import LLMProvider
53
+ from model_manager.model_loader import ModelLoader
54
+ from services.data_models import MissingProtection
55
+ from services.data_models import RiskInterpretation
56
+ from services.data_models import NegotiationPlaybook
57
+ from reporter.pdf_generator import PDFReportGenerator
58
+ from services.data_models import ClauseInterpretation
59
+ from reporter.pdf_generator import generate_pdf_report
60
  from services.summary_generator import SummaryGenerator
61
+ from services.clause_extractor import RiskClauseExtractor
62
+ from services.negotiation_engine import NegotiationEngine
63
+ from services.llm_interpreter import LLMClauseInterpreter
64
+ from services.protection_checker import ProtectionChecker
65
+ from services.contract_classifier import ContractClassifier
66
+ from services.clause_extractor import ComprehensiveClauseExtractor
67
 
 
 
68
 
69
  # ============================================================================
70
+ # CUSTOM SERIALIZATION METHODS
71
  # ============================================================================
 
72
  class NumpyJSONEncoder(json.JSONEncoder):
73
  def default(self, obj: Any) -> Any:
74
  if isinstance(obj, (np.float32, np.float64)):
75
  return float(obj)
76
+
77
  elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
78
  return int(obj)
79
+
80
  elif isinstance(obj, np.ndarray):
81
  return obj.tolist()
82
+
83
  elif isinstance(obj, np.bool_):
84
  return bool(obj)
85
+
86
  elif hasattr(obj, 'item'):
87
  return obj.item()
88
+
89
  elif hasattr(obj, 'to_dict'):
90
  return obj.to_dict()
91
+
92
  elif hasattr(obj, 'dict'):
93
  return obj.dict()
94
+
95
  elif isinstance(obj, (set, tuple)):
96
  return list(obj)
97
+
98
  return super().default(obj)
99
 
100
+
101
  class NumpyJSONResponse(JSONResponse):
102
  def render(self, content: Any) -> bytes:
103
+
104
+ return json.dumps(obj = content,
105
+ ensure_ascii = False,
106
+ allow_nan = False,
107
+ indent = None,
108
+ separators = (",", ":"),
109
+ cls = NumpyJSONEncoder,
110
+ ).encode("utf-8")
111
+
112
 
113
  def convert_numpy_types(obj: Any) -> Any:
114
  if obj is None:
115
  return None
116
+
117
  if isinstance(obj, dict):
118
  return {key: convert_numpy_types(value) for key, value in obj.items()}
119
+
120
  elif isinstance(obj, (list, tuple, set)):
121
  return [convert_numpy_types(item) for item in obj]
122
+
123
  elif isinstance(obj, (np.float32, np.float64)):
124
  return float(obj)
125
+
126
  elif isinstance(obj, (np.int32, np.int64, np.int8, np.uint8)):
127
  return int(obj)
128
+
129
  elif isinstance(obj, np.ndarray):
130
  return obj.tolist()
131
+
132
  elif isinstance(obj, np.bool_):
133
  return bool(obj)
134
+
135
  elif hasattr(obj, 'item'):
136
  return obj.item()
137
+
138
  elif hasattr(obj, 'to_dict'):
139
  return convert_numpy_types(obj.to_dict())
140
+
141
  elif hasattr(obj, 'dict'):
142
  return convert_numpy_types(obj.dict())
143
+
144
  else:
145
  return obj
146
 
147
+
148
  def safe_serialize_response(data: Any) -> Any:
149
  return convert_numpy_types(data)
150
 
 
 
 
151
 
152
+ # PYDANTIC SCHEMAS
153
  class SerializableBaseModel(BaseModel):
154
  def dict(self, *args, **kwargs) -> Dict[str, Any]:
155
  data = super().dict(*args, **kwargs)
156
  return convert_numpy_types(data)
157
+
158
 
159
  def json(self, *args, **kwargs) -> str:
160
  data = self.dict(*args, **kwargs)
161
+ return json.dumps(data, cls = NumpyJSONEncoder, *args, **kwargs)
162
+
163
 
164
  class HealthResponse(SerializableBaseModel):
165
+ status : str
166
+ version : str
167
+ timestamp : str
168
+ models_loaded : int
169
+ services_loaded : int
170
+ memory_usage_mb : float
171
+
172
 
173
  class AnalysisOptions(SerializableBaseModel):
174
+ max_clauses : int = Field(default = 50, ge = 5, le = 30)
175
+ interpret_clauses : bool = Field(default = True)
176
+ generate_negotiation_points : bool = Field(default = True)
177
+ compare_to_market : bool = Field(default = False) # Disabled for now
178
+
179
 
180
  class AnalysisResult(SerializableBaseModel):
181
+ analysis_id : str
182
+ timestamp : str
183
+ classification : Dict[str, Any]
184
+ clauses : List[Dict[str, Any]]
185
+ risk_analysis : Dict[str, Any]
186
+ unfavorable_terms : List[Dict[str, Any]]
187
+ missing_protections : List[Dict[str, Any]]
188
+ clause_interpretations : Optional[List[Dict[str, Any]]] = None
189
+ negotiation_points : Optional[List[Dict[str, Any]]] = None
190
+ market_comparisons : Optional[List[Dict[str, Any]]] = None
191
+ executive_summary : str
192
+ metadata : Dict[str, Any]
193
+ pdf_available : bool = True
194
+
195
 
196
  class ErrorResponse(SerializableBaseModel):
197
+ error : str
198
+ detail : str
199
+ timestamp : str
200
+
201
 
202
  class FileValidationResponse(SerializableBaseModel):
203
+ valid : bool
204
+ message : str
205
+ confidence : Optional[float] = None
206
+ report : Optional[Dict[str, Any]] = None
207
 
 
 
 
208
 
209
+ # SERVICE INITIALIZATION WITH FULL PIPELINE INTEGRATION
210
  class PreloadedAnalysisService:
211
+ """
212
+ Analysis service with complete pipeline integration
213
+ """
214
  def __init__(self):
215
+ self.model_loader = ModelLoader()
216
+ self.llm_manager = LLMManager()
217
+ self.services = dict()
218
+ self.service_status = dict()
219
  self.memory_usage_mb = 0
220
+
221
  self._preload_all_services()
222
+
223
+
224
  def _preload_all_services(self):
225
+ """
226
+ Pre-load ALL services and models at initialization
227
+ """
228
  log_info("PRE-LOADING ALL AI MODELS AND SERVICES")
 
229
  try:
230
  initial_memory = self._get_memory_usage()
231
+
232
+ # Pre-load Contract Classifier
233
  log_info("🔄 Pre-loading Contract Classifier...")
234
+ try:
235
+ self.services["classifier"] = ContractClassifier(self.model_loader)
236
+ self.service_status["classifier"] = "loaded"
237
+ log_info("✅ Contract Classifier loaded")
238
 
239
+ except Exception as e:
240
+ log_error(f"Failed to load ContractClassifier: {repr(e)}")
241
+ raise
242
+
243
+ # Pre-load ComprehensiveClauseExtractor as base for RiskClauseExtractor
244
  log_info("🔄 Pre-loading Comprehensive Clause Extractor...")
245
+ try:
246
+ self.services["comprehensive_extractor"] = ComprehensiveClauseExtractor(self.model_loader)
247
+ self.service_status["comprehensive_extractor"] = "loaded"
248
+
249
+ log_info("✅ Comprehensive Clause Extractor loaded")
250
 
251
+ except Exception as e:
252
+ log_error(f"Failed to load ComprehensiveClauseExtractor: {repr(e)}")
253
+ raise
254
+
255
+ # Initialize RiskClauseExtractor with default type (will be recreated per analysis)
256
+ log_info("🔄 Initializing Risk-Focused Clause Extractor...")
257
+ try:
258
+ self.services["clause_extractor"] = RiskClauseExtractor(model_loader = self.model_loader,
259
+ contract_type = ContractType.GENERAL,
260
+ )
261
+ self.service_status["clause_extractor"] = "loaded"
262
+
263
+ log_info("✅ Risk-Focused Clause Extractor initialized")
264
+
265
+ except Exception as e:
266
+ log_error(f"Failed to initialize RiskClauseExtractor: {repr(e)}")
267
+ raise
268
+
269
+ # Pre-load RiskAnalyzer
270
  log_info("🔄 Pre-loading Risk Analyzer...")
271
+ try:
272
+ # RiskAnalyzer orchestrates other services but doesn't need to initialize them separately
273
+ self.services["risk_analyzer"] = RiskAnalyzer(self.model_loader)
274
+ self.service_status["risk_analyzer"] = "loaded"
275
+
276
+ log_info("✅ Comprehensive Risk Analyzer loaded")
277
 
278
+ except Exception as e:
279
+ log_error(f"Failed to load RiskAnalyzer: {repr(e)}")
280
+ raise
281
+
282
+ # Pre-load LLM Interpreter
283
  log_info("🔄 Pre-loading LLM Interpreter...")
284
  try:
285
+ self.services["llm_interpreter"] = LLMClauseInterpreter(self.llm_manager)
286
  self.service_status["llm_interpreter"] = "loaded"
287
+
288
  log_info("✅ LLM Interpreter loaded")
289
+
290
  except Exception as e:
291
+ self.services["llm_interpreter"] = None
292
+ self.service_status["llm_interpreter"] = f"failed: {repr(e)}"
293
+
294
  log_info("⚠️ LLM Interpreter not available")
295
+
296
+ # Pre-load Negotiation Engine
297
  log_info("🔄 Pre-loading Negotiation Engine...")
298
  try:
299
+ # Initialize with LLM manager - ensure constructor args match
300
+ self.services["negotiation_engine"] = NegotiationEngine(llm_manager = self.llm_manager,
301
+ default_provider = LLMProvider.OLLAMA,
302
+ )
303
  self.service_status["negotiation_engine"] = "loaded"
304
+
305
  log_info("✅ Negotiation Engine loaded")
306
+
307
  except Exception as e:
308
+ self.services["negotiation_engine"] = None
309
+ self.service_status["negotiation_engine"] = f"failed: {repr(e)}"
310
+
311
  log_info("⚠️ Negotiation Engine not available")
312
+
313
+ # Pre-load Summary Generator
314
  log_info("🔄 Pre-loading Summary Generator...")
315
  try:
316
+ # Initialize with LLM manager
317
+ self.services["summary_generator"] = SummaryGenerator(llm_manager = self.llm_manager)
318
  self.service_status["summary_generator"] = "loaded"
319
+
320
  log_info("✅ Summary Generator loaded")
321
+
322
  except Exception as e:
323
+ # Fallback if initialization fails
324
+ self.services["summary_generator"] = SummaryGenerator()
325
  self.service_status["summary_generator"] = "fallback_loaded"
326
+
327
  log_info("⚠️ Summary Generator using fallback mode")
328
+
329
+ # Pre-load Unfavorable Term Analyzer
330
+ log_info("🔄 Pre-loading Unfavorable Term Analyzer...")
331
+ try:
332
+ # Initialize with default contract type, will be updated per analysis
333
+ self.services["term_analyzer"] = TermAnalyzer(contract_type = ContractType.GENERAL)
334
+ self.service_status["term_analyzer"] = "loaded"
335
+
336
+ log_info("✅ Unfavorable Term Analyzer loaded")
337
+
338
+ except Exception as e:
339
+ log_error(f"Failed to load TermAnalyzer: {repr(e)}")
340
+ raise
341
+
342
+ # Pre-load Missing Protection Checker
343
+ log_info("🔄 Pre-loading Missing Protection Checker...")
344
+ try:
345
+ # Initialize with default contract type, will be updated per analysis
346
+ self.services["protection_checker"] = ProtectionChecker(contract_type = ContractType.GENERAL)
347
+ self.service_status["protection_checker"] = "loaded"
348
+
349
+ log_info("✅ Protection Checker loaded")
350
 
351
+ except Exception as e:
352
+ log_error(f"Failed to load ProtectionChecker: {repr(e)}")
353
+ raise
354
+
355
  # Calculate memory usage
356
+ final_memory = self._get_memory_usage()
357
  self.memory_usage_mb = final_memory - initial_memory
358
 
359
  log_info("🎉 ALL SERVICES PRE-LOADED SUCCESSFULLY!")
360
  log_info(f"📊 Memory Usage: {self.memory_usage_mb:.2f} MB")
361
  log_info(f"🔧 Services Loaded: {len(self.service_status)}")
362
+
363
  except Exception as e:
364
  log_error(f"CRITICAL: Failed to pre-load services: {e}")
365
  raise
366
+
367
+
368
  def _get_memory_usage(self) -> float:
369
+ """
370
+ Get current memory usage in MB
371
+ """
372
  try:
373
  import psutil
374
  process = psutil.Process()
375
  return process.memory_info().rss / 1024 / 1024
376
+
377
  except ImportError:
378
  return 0.0
379
+
380
+
381
+ def _create_fallback_negotiation_points(self, risk_score, unfavorable_terms, missing_protections):
382
+ """
383
+ Create basic negotiation points when engine fails
384
+ """
385
+ fallback_points = list()
386
+ # Add top unfavorable terms
387
+ for term in unfavorable_terms[:5]:
388
+ fallback_points.append({"priority" : 1 if term.severity == "critical" else 2,
389
+ "category" : term.category,
390
+ "issue" : term.term,
391
+ "current_language" : "See contract clause",
392
+ "proposed_language" : term.suggested_fix or "Request balanced language",
393
+ "rationale" : term.explanation,
394
+ "estimated_difficulty" : "medium"
395
+ })
396
+ # Add critical missing protections
397
+ for protection in [p for p in missing_protections if (p.importance == "critical")][:5]:
398
+ fallback_points.append({"priority" : 1,
399
+ "category" : protection.categories[0] if protection.categories else "general",
400
+ "issue" : f"Add {protection.protection}",
401
+ "current_language" : "[MISSING]",
402
+ "proposed_language" : protection.suggested_language or protection.recommendation,
403
+ "rationale" : protection.explanation,
404
+ "estimated_difficulty" : "medium"
405
+ })
406
+ return fallback_points
407
+
408
+
409
  def get_service_status(self) -> Dict[str, Any]:
410
+ """
411
+ Get detailed service status
412
+ """
413
  model_stats = self.model_loader.get_registry_stats()
414
+ return {"services" : self.service_status,
415
+ "models" : model_stats,
416
+ "memory_usage_mb" : self.memory_usage_mb,
417
+ "total_services_loaded" : len([s for s in self.service_status.values() if "loaded" in str(s)]),
418
+ "total_models_loaded" : model_stats.get("loaded_models", 0),
419
+ }
420
+
421
+
422
  def analyze_contract(self, contract_text: str, options: AnalysisOptions) -> Dict[str, Any]:
423
+ """
424
+ Complete contract analysis using full pipeline
425
+ """
426
  try:
427
  log_info("Starting comprehensive contract analysis pipeline...")
428
+
429
+ # Classify contract
430
+ classification = self.services["classifier"].classify_contract(contract_text)
431
+ classification_dict = safe_serialize_response(classification.to_dict())
432
  log_info(f"Contract classified as: {classification.category}")
433
 
434
+ # Debug logging for classification
435
+ log_info(f"Classification details - Confidence: {classification.confidence:.3f}, "
436
+ f"Subcategory: {classification.subcategory}, "
437
+ f"Keywords found: {len(classification.detected_keywords)}",
438
+ )
439
+
440
+ # Get ContractType enum for downstream services
441
+ contract_type_enum = self._get_contract_type_enum(category_str = classification.category)
442
+
443
+ # Re-initialize RiskClauseExtractor with correct contract type: crucial for category mapping in risk analysis
444
+ if (hasattr(self.services["clause_extractor"], 'contract_type')):
445
+ self.services["clause_extractor"].contract_type = contract_type_enum
446
+ self.services["clause_extractor"].category_weights = self.services["clause_extractor"].risk_rules.get_adjusted_weights(contract_type_enum)
447
+
448
+ log_info(f"Updated RiskClauseExtractor for contract type: {contract_type_enum.value}")
449
 
450
+ else:
451
+ # Fallback: create new instance if update not possible
452
+ self.services["clause_extractor"] = RiskClauseExtractor(model_loader = self.model_loader,
453
+ contract_type = contract_type_enum,
454
+ )
455
+ log_info(f"Re-initialized RiskClauseExtractor for contract type: {contract_type_enum.value}")
456
+
457
+ # Extract Risk Focused clauses (outputs risk categories)
458
+ clauses = list()
459
+ clauses_dict = list()
460
+ try:
461
+ # Try risk-focused extraction first
462
+ clauses = self.services["clause_extractor"].extract_risk_clauses(contract_text = contract_text,
463
+ max_clauses = options.max_clauses,
464
+ )
465
+
466
+ log_info(f"Extracted {len(clauses)} risk-focused clauses")
467
 
468
+ except Exception as e:
469
+ log_error(f"Risk-focused clause extraction failed: {repr(e)}")
470
+ # Fallback to comprehensive extraction
471
+ try:
472
+ log_info("Attempting fallback to comprehensive clause extraction...")
473
+ clauses = self.services["comprehensive_extractor"].extract_clauses(contract_text = contract_text,
474
+ max_clauses = options.max_clauses,
475
+ )
476
+
477
+ log_info(f"Fallback extracted {len(clauses)} comprehensive clauses")
478
+
479
+ except Exception as fallback_error:
480
+ log_error(f"Comprehensive clause extraction also failed: {repr(fallback_error)}")
481
+ clauses = []
482
+
483
+ # Process clauses regardless of extraction method
484
+ if clauses:
485
+ clauses_dict = [safe_serialize_response(clause.to_dict()) for clause in clauses]
486
+ # Debug logging for clause extraction
487
+ clause_categories = [clause.category for clause in clauses]
488
+ unique_categories = list(set(clause_categories))
489
+
490
+ log_info(f"Clause categories extracted: {unique_categories}")
491
+
492
+ # Log risk scores if available
493
+ risk_scores = [getattr(clause, 'risk_score', 0) for clause in clauses if hasattr(clause, 'risk_score')]
494
+
495
+ if risk_scores:
496
+ avg_risk = sum(risk_scores) / len(risk_scores)
497
+ log_info(f"Average clause risk score: {avg_risk:.2f}")
498
+
499
+ # Analyze UNFAVORABLE TERMS (outputs risk categories)
500
+ unfavorable_terms = list()
501
+ unfavorable_terms_dict = list()
502
 
503
+ try:
504
+ # Update term analyzer with correct contract type
505
+ if hasattr(self.services["term_analyzer"], 'contract_type'):
506
+ self.services["term_analyzer"].contract_type = contract_type_enum
507
+ self.services["term_analyzer"].category_weights = self.services["term_analyzer"].risk_rules.get_adjusted_weights(contract_type_enum)
508
+
509
+ log_info(f"Updated TermAnalyzer for contract type: {contract_type_enum.value}")
510
+
511
+ unfavorable_terms = self.services["term_analyzer"].analyze_unfavorable_terms(contract_text = contract_text,
512
+ clauses = clauses,
513
+ contract_type = contract_type_enum,
514
+ )
515
+
516
+ unfavorable_terms_dict = [safe_serialize_response(term.to_dict()) for term in unfavorable_terms]
517
+
518
+ log_info(f"Analyzed {len(unfavorable_terms)} unfavorable terms")
519
+
520
+ # Debug logging for term analysis
521
+ if unfavorable_terms:
522
+ severity_counts = dict()
523
+ for term in unfavorable_terms:
524
+ severity_counts[term.severity] = severity_counts.get(term.severity, 0) + 1
525
+
526
+ log_info(f"Term severity distribution: {severity_counts}")
527
+
528
+ # Log top 10 highest risk terms
529
+ top_terms = sorted(unfavorable_terms, key = lambda x: x.risk_score, reverse = True)[:10]
530
+ for i, term in enumerate(top_terms):
531
+ log_info(f"Top term {i+1}: {term.term} (Risk: {term.risk_score}, Severity: {term.severity})")
532
 
533
+ except Exception as e:
534
+ log_error(f"Unfavorable terms analysis failed: {repr(e)}")
535
+
536
+ # Continue with empty terms but log the error
537
+ unfavorable_terms = list()
538
+ unfavorable_terms_dict = list()
539
+
540
+ # Check for Missing Protections (outputs risk categories)
541
+ missing_protections = list()
542
+ missing_protections_dict = list()
543
+
544
+ try:
545
+ # Update protection checker with correct contract type
546
+ if hasattr(self.services["protection_checker"], 'contract_type'):
547
+ self.services["protection_checker"].contract_type = contract_type_enum
548
+ self.services["protection_checker"].protection_priorities = self.services["protection_checker"]._get_contract_type_priorities()
549
+
550
+ log_info(f"Updated ProtectionChecker for contract type: {contract_type_enum.value}")
551
+
552
+ missing_protections = self.services["protection_checker"].check_missing_protections(contract_text = contract_text,
553
+ clauses = clauses,
554
+ contract_type = contract_type_enum,
555
+ )
556
+ missing_protections_dict = [safe_serialize_response(prot.to_dict()) for prot in missing_protections]
557
+
558
+ log_info(f"Checked for {len(missing_protections)} missing protections")
559
+
560
+ # Debug logging for protection analysis
561
+ if missing_protections:
562
+ importance_counts = dict()
563
+ for prot in missing_protections:
564
+ importance_counts[prot.importance] = importance_counts.get(prot.importance, 0) + 1
565
+
566
+ log_info(f"Missing protection importance: {importance_counts}")
567
+
568
+ # Log top 10 highest risk missing protections
569
+ top_protections = sorted(missing_protections, key = lambda x: x.risk_score, reverse = True)[:10]
570
+
571
+ for i, prot in enumerate(top_protections):
572
+ log_info(f"Top missing protection {i+1}: {prot.protection} (Risk: {prot.risk_score}, Importance: {prot.importance})")
573
+
574
+ except Exception as e:
575
+ log_error(f"Missing protection analysis failed: {repr(e)}")
576
+
577
+ # Continue with empty protections but log the error
578
+ missing_protections = list()
579
+ missing_protections_dict = list()
580
+
581
+ # Perform Complete Risk Analysis
582
+ risk_score = self.services["risk_analyzer"].analyze_contract_risk(contract_text = contract_text)
583
+ risk_dict = safe_serialize_response(risk_score.to_dict())
584
+ log_info(f"Risk analysis completed: {risk_score.overall_score}/100")
585
+
586
+ # Generate LLM Interpretations (if available)
587
  risk_interpretation = None
588
 
589
+ if self.services["llm_interpreter"]:
590
  try:
591
+ risk_interpretation = self.services["llm_interpreter"].interpret_with_risk_context(clauses = clauses,
592
+ unfavorable_terms = unfavorable_terms,
593
+ missing_protections = missing_protections,
594
+ contract_type = contract_type_enum,
595
+ overall_risk_score = risk_score.overall_score,
596
+ max_clauses = len(clauses),
597
+ provider = LLMProvider.OLLAMA,
598
+ )
599
+ log_info("LLM risk interpretation generated")
600
+
 
 
 
601
  except Exception as e:
602
+ log_error(f"LLM interpretation failed: {repr(e)}")
603
+ # Continue without LLM interpretation
604
 
605
+ else:
606
+ # If LLM is not available, create a basic interpretation object to pass downstream
607
+ risk_interpretation = RiskInterpretation(overall_risk_explanation = f"Contract risk score: {risk_score.overall_score}/100 ({risk_score.risk_level}).",
608
+ key_concerns = [f"Risk level: {risk_score.risk_level}"],
609
+ negotiation_strategy = "Address critical terms identified in analysis.",
610
+ market_comparison = "Compare with industry standards.",
611
+ clause_interpretations = [],
612
+ )
613
+
614
+
615
+ # Generate Negotiation Playbook (uses full context)
616
  negotiation_playbook = None
617
+ negotiation_dict = list()
618
 
619
+ if self.services["negotiation_engine"]:
620
  try:
621
+ # Ensure we have proper objects, not dicts
622
+ unfavorable_terms_objects = unfavorable_terms
623
+ missing_protections_objects = missing_protections
 
 
 
 
 
 
 
 
 
 
 
 
624
 
625
+ # Create a fallback risk interpretation if LLM failed (already handled above)
626
+ negotiation_playbook = self.services["negotiation_engine"].generate_comprehensive_playbook(risk_analysis = risk_score,
627
+ risk_interpretation = risk_interpretation,
628
+ unfavorable_terms = unfavorable_terms_objects,
629
+ missing_protections = missing_protections_objects,
630
+ clauses = clauses,
631
+ contract_type = contract_type_enum,
632
+ max_points = len(clauses),
633
+ )
634
+
635
+ negotiation_dict = [safe_serialize_response(point.to_dict()) for point in negotiation_playbook.critical_points]
636
+
637
+ log_info(f"Negotiation playbook generated with {len(negotiation_playbook.critical_points)} points")
638
 
639
  except Exception as e:
640
+ log_error(f"Negotiation playbook generation failed: {repr(e)}")
641
+
642
+ # Create fallback negotiation points
643
+ negotiation_dict = self._create_fallback_negotiation_points(risk_score, unfavorable_terms, missing_protections)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
 
645
+ else:
646
+ # If negotiation engine is not available, create fallback points
647
+ negotiation_dict = self._create_fallback_negotiation_points(risk_score, unfavorable_terms, missing_protections)
648
+
649
+
650
+ # Generate Executive Summary (uses full context)
651
+ executive_summary = self.services["summary_generator"].generate_executive_summary(contract_text = contract_text,
652
+ classification = classification,
653
+ risk_analysis = risk_score,
654
+ risk_interpretation = risk_interpretation,
655
+ negotiation_playbook = negotiation_playbook,
656
+ unfavorable_terms = unfavorable_terms,
657
+ missing_protections = missing_protections,
658
+ clauses = clauses,
659
+ )
660
+ log_info("Executive summary generated")
661
+
662
  # Build final result matching frontend expectations
663
+ result = {"analysis_id" : str(uuid.uuid4()),
664
+ "timestamp" : datetime.now().isoformat(),
665
+ "classification" : classification_dict,
666
+ "clauses" : clauses_dict,
667
+ "risk_analysis" : risk_dict,
668
+ "unfavorable_terms" : unfavorable_terms_dict,
669
+ "missing_protections" : missing_protections_dict,
670
+ "clause_interpretations" : [safe_serialize_response(interp.to_dict()) for interp in (risk_interpretation.clause_interpretations if risk_interpretation else [])],
671
+ "negotiation_points" : negotiation_dict,
672
+ "market_comparisons" : [],
673
+ "executive_summary" : executive_summary,
674
+ "metadata" : {"text_length" : len(contract_text),
675
+ "word_count" : len(contract_text.split()),
676
+ "num_clauses" : len(clauses),
677
+ "contract_type" : contract_type_enum.value,
678
+ "actual_category" : classification.category,
679
+ "subcategory" : classification.subcategory,
680
+ "classification_confidence" : classification.confidence,
681
+ "detected_keywords" : classification.detected_keywords,
682
+ "options" : options.dict(),
683
+ },
684
+ "pdf_available" : True,
685
+ }
686
+
687
  log_info("Contract analysis completed successfully")
688
  return result
689
+
690
  except Exception as e:
691
+ log_error(f"Contract analysis failed: {repr(e)}")
692
  raise
693
+
694
+
695
+ def _score_to_risk_level(self, score: float) -> str:
696
+ """
697
+ Convert risk score to risk level string
698
+ """
699
+ if (score >= 80):
700
+ return "Critical"
701
+
702
+ elif (score >= 60):
703
+ return "High"
704
+
705
+ elif (score >= 40):
706
+ return "Medium"
707
+
708
+ else:
709
+ return "Low"
710
+
711
+
712
  def _get_contract_type_enum(self, category_str: str) -> ContractType:
713
+ """
714
+ Convert category string to ContractType enum with fallback
715
+ """
716
+ mapping = {'employment' : ContractType.EMPLOYMENT,
717
+ 'consulting' : ContractType.CONSULTING,
718
+ 'nda' : ContractType.NDA,
719
+ 'software' : ContractType.SOFTWARE,
720
+ 'service' : ContractType.SERVICE,
721
+ 'partnership' : ContractType.PARTNERSHIP,
722
+ 'lease' : ContractType.LEASE,
723
+ 'purchase' : ContractType.PURCHASE,
724
+ 'general' : ContractType.GENERAL,
725
+ }
726
+
727
+ contract_type = mapping.get(category_str, ContractType.GENERAL)
728
+
729
+ log_info(f"Mapping category '{category_str}' to ContractType: {contract_type.value}")
730
+
731
+ return contract_type
732
 
 
 
 
733
 
734
+
735
+ # FASTAPI APPLICATION : Global instances
736
+ analysis_service : Optional[PreloadedAnalysisService] = None
737
+ app_start_time = time.time()
738
 
739
  # Initialize logger
740
+ ContractAnalyzerLogger.setup(log_dir = "logs",
741
+ app_name = "contract_analyzer",
742
+ )
743
+
744
  logger = ContractAnalyzerLogger.get_logger()
745
 
746
+
747
  @asynccontextmanager
748
  async def lifespan(app: FastAPI):
749
  global analysis_service
 
750
  log_info(f"🚀 {settings.APP_NAME} v{settings.APP_VERSION} STARTING UP...")
751
  log_info("=" * 80)
752
 
753
  try:
754
  analysis_service = PreloadedAnalysisService()
755
  log_info("✅ All services initialized successfully")
756
+
757
  except Exception as e:
758
  log_error(f"Startup failed: {e}")
759
  raise
760
+
761
  log_info(f"📍 Server: {settings.HOST}:{settings.PORT}")
762
  log_info("=" * 80)
763
  log_info("✅ AI Contract Risk Analyzer Ready!")
764
+
765
  try:
766
  yield
767
+
768
  finally:
769
  log_info("🛑 Shutting down server...")
770
  log_info("✅ Server shutdown complete")
771
 
772
+ # Define the application
773
+ app = FastAPI(title = settings.APP_NAME,
774
+ version = settings.APP_VERSION,
775
+ description = "AI-powered contract risk analysis",
776
+ docs_url = "/api/docs",
777
+ redoc_url = "/api/redoc",
778
+ default_response_class = NumpyJSONResponse,
779
+ lifespan = lifespan,
780
+ )
781
 
782
  # Get absolute paths
783
+ BASE_DIR = Path(__file__).parent
784
  STATIC_DIR = BASE_DIR / "static"
785
 
786
  # Serve static files
787
+ app.mount("/static", StaticFiles(directory = str(STATIC_DIR)), name = "static")
788
 
789
+ # CORS middleware
790
+ app.add_middleware(CORSMiddleware,
791
+ allow_origins = settings.CORS_ORIGINS,
792
+ allow_credentials = settings.CORS_ALLOW_CREDENTIALS,
793
+ allow_methods = settings.CORS_ALLOW_METHODS,
794
+ allow_headers = settings.CORS_ALLOW_HEADERS,
795
+ )
 
796
 
 
 
 
797
 
798
+ # HELPER FUNCTIONS
799
  def validate_file(file: UploadFile) -> tuple[bool, str]:
800
+ file_extension = os.path.splitext(file.filename)[1].lower()
801
+
802
+ if file_extension not in settings.ALLOWED_EXTENSIONS:
803
  return False, f"Invalid file type. Allowed: {', '.join(settings.ALLOWED_EXTENSIONS)}"
804
 
805
  file.file.seek(0, 2)
806
  size = file.file.tell()
807
+
808
  file.file.seek(0)
809
 
810
+ if (size > settings.MAX_UPLOAD_SIZE):
811
+ return False, f"File too large. Max size: {settings.MAX_UPLOAD_SIZE / (1024*1024):.1f}MB"
812
 
813
+ if (size == 0):
814
  return False, "File is empty"
815
 
816
  return True, "OK"
 
821
  Read contract file and return text content.
822
  """
823
  reader = DocumentReader()
 
824
  # Extract file extension without dot
825
  filename = file.filename.lower()
826
  file_extension = Path(filename).suffix.lower().lstrip('.')
 
831
  print(f"📁 DEBUG app.py - No extension found, defaulting to: '{file_extension}'")
832
 
833
  file_contents = reader.read_file(file.file, file_extension)
 
834
  if (not file_contents or not file_contents.strip()):
835
  raise ValueError("Could not extract text from file")
836
+
837
  return file_contents
838
 
839
 
 
840
  def validate_contract_text(text: str) -> tuple[bool, str]:
841
  if not text or not text.strip():
842
  return False, "Contract text is empty"
843
 
844
+ if (len(text) < settings.MIN_CONTRACT_LENGTH):
845
  return False, f"Contract text too short. Minimum {settings.MIN_CONTRACT_LENGTH} characters required."
846
 
847
+ if (len(text) > settings.MAX_CONTRACT_LENGTH):
848
  return False, f"Contract text too long. Maximum {settings.MAX_CONTRACT_LENGTH} characters allowed."
849
 
850
  return True, "OK"
851
 
 
 
 
852
 
853
+
854
+ # API ROUTES
855
  @app.get("/")
856
  async def serve_frontend():
857
  return FileResponse(str(STATIC_DIR / "index.html"))
858
 
859
+
860
+ @app.get("/api/v1/health", response_model = HealthResponse)
861
  async def health_check():
862
  if not analysis_service:
863
+ raise HTTPException(status_code = 503,
864
+ detail = "Service not initialized",
865
+ )
866
+
867
  service_status = analysis_service.get_service_status()
868
+
869
+ return HealthResponse(status = "healthy",
870
+ version = settings.APP_VERSION,
871
+ timestamp = datetime.now().isoformat(),
872
+ models_loaded = service_status["total_models_loaded"],
873
+ services_loaded = service_status["total_services_loaded"],
874
+ memory_usage_mb = service_status["memory_usage_mb"],
875
+ )
876
+
877
 
878
  @app.get("/api/v1/status")
879
  async def get_detailed_status():
880
  if not analysis_service:
881
+ raise HTTPException(status_code = 503,
882
+ detail = "Service not initialized",
883
+ )
884
+
885
  return analysis_service.get_service_status()
886
 
887
+
888
+ @app.post("/api/v1/analyze/file", response_model = AnalysisResult)
889
+ async def analyze_contract_file(file: UploadFile = File(...), max_clauses: int = Form(50), interpret_clauses: bool = Form(True),
890
+ generate_negotiation_points: bool = Form(True), compare_to_market: bool = Form(False)):
 
 
 
 
891
  if not analysis_service:
892
+ raise HTTPException(status_code = 503,
893
+ detail = "Service not initialized",
894
+ )
895
+
896
  try:
897
  # Validate file
898
  is_valid, message = validate_file(file)
899
+
900
  if not is_valid:
901
+ raise HTTPException(status_code = 400,
902
+ detail = message,
903
+ )
904
+
905
  # Read contract text
906
+ contract_text = read_contract_file(file)
907
+
908
  # Validate contract text
909
  is_valid_text, text_message = validate_contract_text(contract_text)
910
+
911
  if not is_valid_text:
912
+ raise HTTPException(status_code = 400,
913
+ detail = text_message,
914
+ )
915
+
916
  # Validate contract structure using ContractValidator
917
+ validator = ContractValidator()
918
  is_valid_contract, contract_type, confidence = validator.is_valid_contract(contract_text)
919
+
920
  if not is_valid_contract:
921
+ raise HTTPException(status_code = 400,
922
+ detail = f"Invalid contract: {confidence}",
923
+ )
924
+
925
  # Create analysis options
926
+ options = AnalysisOptions(max_clauses = max_clauses,
927
+ interpret_clauses = interpret_clauses,
928
+ generate_negotiation_points = generate_negotiation_points,
929
+ compare_to_market = compare_to_market,
930
+ )
 
 
931
  # Perform analysis
932
+ result = analysis_service.analyze_contract(contract_text, options)
933
+
934
  log_info(f"File analysis completed",
935
+ filename = file.filename,
936
+ analysis_id = result["analysis_id"],
937
+ risk_score = result["risk_analysis"]["overall_score"],
938
+ )
939
+
940
  return AnalysisResult(**result)
941
+
942
  except HTTPException:
943
  raise
944
+
945
  except Exception as e:
946
+ log_error(f"File analysis failed: {repr(e)}")
947
+
948
+ raise HTTPException(status_code = 500,
949
+ detail = f"Analysis failed: {repr(e)}",
950
+ )
951
+
952
+
953
+ @app.post("/api/v1/analyze/text", response_model = AnalysisResult)
954
+ async def analyze_contract_text(contract_text: str = Form(..., description="Contract text to analyze"), max_clauses: int = Form(15), interpret_clauses: bool = Form(True),
955
+ generate_negotiation_points: bool = Form(True), compare_to_market: bool = Form(False)):
 
956
  if not analysis_service:
957
+ raise HTTPException(status_code = 503,
958
+ detail = "Service not initialized",
959
+ )
960
  try:
961
  # Validate contract text length first
962
  is_valid, message = validate_contract_text(contract_text)
 
 
963
 
964
+ if not is_valid:
965
+ raise HTTPException(status_code = 400,
966
+ detail = message,
967
+ )
968
+
969
  # Validate contract structure using ContractValidator
970
+ validator = ContractValidator()
971
  is_valid_contract, validation_type, message = validator.is_valid_contract(contract_text)
972
 
973
  if not is_valid_contract:
974
  error_message = message if "does not appear to be a legal contract" in message else "The provided document does not appear to be a legal contract. Please upload a valid contract for analysis."
975
+ raise HTTPException(status_code = 400,
976
+ detail = error_message,
977
+ )
978
+
979
  # Create analysis options
980
+ options = AnalysisOptions(max_clauses = max_clauses,
981
+ interpret_clauses = interpret_clauses,
982
+ generate_negotiation_points = generate_negotiation_points,
983
+ compare_to_market = compare_to_market,
984
+ )
 
 
985
  # Perform analysis
986
+ result = analysis_service.analyze_contract(contract_text, options)
987
+
988
  log_info(f"Text analysis completed",
989
+ analysis_id = result["analysis_id"],
990
+ risk_score = result["risk_analysis"]["overall_score"],
991
+ )
992
 
993
  return AnalysisResult(**result)
994
+
995
  except HTTPException:
996
  raise
997
+
998
  except Exception as e:
999
+ log_error(f"Text analysis failed: {repr(e)}")
1000
+
1001
+ raise HTTPException(status_code = 500,
1002
+ detail = f"Analysis failed: {repr(e)}",
1003
+ )
1004
+
1005
 
1006
  @app.post("/api/v1/generate-pdf")
1007
  async def generate_pdf_from_analysis(analysis_result: Dict[str, Any]):
1008
  try:
1009
+ import json
1010
+ with open("analysis_result.json", "w") as fp:
1011
+ json.dump(analysis_result , fp)
1012
+
1013
+ # Pass the full analysis_result dictionary to the PDF generator
1014
+ pdf_buffer = generate_pdf_report(analysis_result = analysis_result)
1015
  analysis_id = analysis_result.get('analysis_id', 'report')
1016
+
1017
+ return Response(content = pdf_buffer.getvalue(),
1018
+ media_type = "application/pdf",
1019
+ headers = {"Content-Disposition": f"attachment; filename=contract_analysis_{analysis_id}.pdf"}
1020
+ )
1021
+
 
1022
  except Exception as e:
1023
+ log_error(f"PDF generation failed: {repr(e)}")
1024
+
1025
+ raise HTTPException(status_code = 500,
1026
+ detail = f"Failed to generate PDF: {repr(e)}",
1027
+ )
1028
+
1029
 
1030
  @app.get("/api/v1/categories")
1031
  async def get_contract_categories():
1032
  if not analysis_service:
1033
+ raise HTTPException(status_code = 503,
1034
+ detail = "Service not initialized",
1035
+ )
1036
 
1037
  try:
1038
+ # Get categories from classifier
1039
+ categories = analysis_service.services["classifier"].get_all_categories()
1040
+
1041
+ # Get descriptions for each category
1042
+ category_details = list()
1043
+
1044
+ for category in categories:
1045
+ description = analysis_service.services["classifier"].get_category_description(category)
1046
+ subcategories = analysis_service.services["classifier"].get_subcategories(category)
1047
+ category_details.append({"name" : category,
1048
+ "description" : description,
1049
+ "subcategories" : subcategories,
1050
+ })
1051
+
1052
+ return {"categories": category_details}
1053
+
1054
  except Exception as e:
1055
+ log_error(f"Categories fetch failed: {repr(e)}")
1056
+ raise HTTPException(status_code = 500,
1057
+ detail = f"Failed to get categories: {repr(e)}")
1058
+
1059
 
1060
+ @app.post("/api/v1/validate/file", response_model = FileValidationResponse)
1061
+ async def validate_contract_file_endpoint(file: UploadFile = File(...)):
1062
  try:
1063
  is_valid, message = validate_file(file)
1064
  if not is_valid:
1065
+ return FileValidationResponse(valid = False,
1066
+ message = message,
1067
+ )
1068
+
1069
  contract_text = read_contract_file(file)
1070
+
1071
  # Validate text length
1072
  is_valid_text, text_message = validate_contract_text(contract_text)
 
 
1073
 
1074
+ if not is_valid_text:
1075
+ return FileValidationResponse(valid = False,
1076
+ message = text_message,
1077
+ )
1078
+
1079
  # Validate contract structure using ContractValidator
1080
  validator = ContractValidator()
1081
+ report = validator.get_validation_report(contract_text)
 
 
 
 
 
 
 
1082
 
1083
+ return FileValidationResponse(valid = (report["scores"]["total"] > 50) and is_valid_text,
1084
+ message = "Contract appears valid" if (report["scores"]["total"] > 50) else "May not be a valid contract",
1085
+ confidence = report["scores"]["total"],
1086
+ report = report,
1087
+ )
1088
+
1089
  except Exception as e:
1090
  log_error(f"File validation failed: {e}")
1091
+
1092
+ raise HTTPException(status_code = 400,
1093
+ detail = f"Validation failed: {repr(e)}",
1094
+ )
1095
 
1096
+
1097
+ @app.post("/api/v1/validate/text", response_model = FileValidationResponse)
1098
  async def validate_contract_text_endpoint(contract_text: str = Form(...)):
1099
  try:
1100
  # Validate text length
1101
  is_valid, message = validate_contract_text(contract_text)
 
 
1102
 
1103
+ if not is_valid:
1104
+ return FileValidationResponse(valid = False,
1105
+ message = message,
1106
+ )
1107
+
1108
  # Validate contract structure using ContractValidator
1109
  validator = ContractValidator()
1110
+ report = validator.get_validation_report(contract_text)
 
 
 
 
 
 
 
1111
 
1112
+ return FileValidationResponse(valid = (report["scores"]["total"] > 50) and is_valid,
1113
+ message = "Contract appears valid" if (report["scores"]["total"] > 50) else "May not be a valid contract",
1114
+ confidence = report["scores"]["total"],
1115
+ report = report,
1116
+ )
1117
+
1118
  except Exception as e:
1119
+ log_error(f"Text validation failed: {repr(e)}")
1120
+ raise HTTPException(status_code = 400,
1121
+ detail = f"Validation failed: {repr(e)}",
1122
+ )
1123
 
 
 
 
1124
 
1125
+ # ERROR HANDLERS AND MIDDLEWARE
1126
  @app.exception_handler(HTTPException)
1127
  async def http_exception_handler(request, exc):
1128
+ return NumpyJSONResponse(status_code = exc.status_code,
1129
+ content = ErrorResponse(error = exc.detail,
1130
+ detail = str(exc.detail),
1131
+ timestamp = datetime.now().isoformat(),
1132
+ ).dict()
1133
+ )
1134
+
 
1135
 
1136
  @app.exception_handler(Exception)
1137
  async def general_exception_handler(request, exc):
1138
  log_error(f"Unhandled exception: {exc}")
1139
+
1140
+ return NumpyJSONResponse(status_code = 500,
1141
+ content = ErrorResponse(error = "Internal server error",
1142
+ detail = str(exc),
1143
+ timestamp = datetime.now().isoformat(),
1144
+ ).dict()
1145
+ )
1146
+
1147
 
1148
  @app.middleware("http")
1149
  async def log_requests(request: Request, call_next):
1150
+ start_time = time.time()
1151
+ response = await call_next(request)
1152
  process_time = time.time() - start_time
1153
 
1154
  log_info(f"API Request: {request.method} {request.url.path} - Status: {response.status_code} - Duration: {process_time:.3f}s")
1155
 
1156
  return response
1157
 
1158
+
1159
+
1160
+ # MAIN
1161
  if __name__ == "__main__":
1162
  def signal_handler(sig, frame):
1163
  print("\n👋 Received Ctrl+C, shutting down gracefully...")
1164
  sys.exit(0)
1165
+
1166
  signal.signal(signal.SIGINT, signal_handler)
1167
+
1168
  try:
1169
+ uvicorn.run("app:app",
1170
+ host = settings.HOST,
1171
+ port = settings.PORT,
1172
+ reload = settings.RELOAD,
1173
+ workers = settings.WORKERS,
1174
+ log_level = settings.LOG_LEVEL.lower(),
1175
+ )
1176
+
1177
  except KeyboardInterrupt:
1178
  print("\n🎯 Server stopped by user")
1179
+
1180
  except Exception as e:
1181
  log_error(f"Server error: {e}")
1182
+
1183
+ sys.exit(1)
config/risk_rules.py CHANGED
@@ -170,64 +170,75 @@ class RiskRules:
170
  "low" : 20,
171
  }
172
 
173
- CATEGORY_DESCRIPTIONS = {"restrictive_covenants" : {"high" : "Overly restrictive non-compete, non-solicit, or confidentiality terms that may significantly limit future opportunities",
174
- "medium" : "Some restrictive terms present; review duration, geographic scope, and industry limitations",
175
- "low" : "Reasonable restrictive covenants appropriate for this role and industry standards",
176
- },
177
- "termination_rights" : {"high" : "Unbalanced termination rights with immediate termination, 'at-will' clauses, or unequal notice periods favoring one party",
178
- "medium" : "Moderately balanced termination provisions; review notice period requirements and severance terms",
179
- "low" : "Fair termination rights with reasonable notice periods and balanced severance provisions",
180
- },
181
- "penalties_liability" : {"high" : "Excessive penalty clauses, unlimited liability exposure, or one-sided indemnification terms",
182
- "medium" : "Some concerning liability terms; review indemnification scope, damage limitations, and warranty provisions",
183
- "low" : "Standard liability limitations, reasonable penalty provisions, and balanced indemnification terms",
184
- },
185
- "compensation_benefits" : {"high" : "Compensation structure lacks clarity, contains vague terms, or has unfavorable payment conditions",
186
- "medium" : "Compensation terms are generally clear but could benefit from more specific bonus structure and payment terms",
187
- "low" : "Clear and competitive compensation package with well-defined payment terms and bonus structure",
188
- },
189
- "intellectual_property" : {"high" : "Overly broad IP assignment that may cover personal projects or lacks proper prior IP exclusion",
190
- "medium" : "IP terms mostly clear but could benefit from stronger prior IP protection and clearer ownership terms",
191
- "low" : "Well-defined intellectual property ownership, clear usage rights, and proper prior IP exclusion",
192
- },
193
- "confidentiality" : {"high" : "Overly broad confidentiality scope, perpetual duration, or insufficient protection exceptions",
194
- "medium" : "Standard confidentiality terms with some areas that could be more precisely defined",
195
- "low" : "Reasonable confidentiality provisions with appropriate scope and duration",
196
- },
197
- "liability_indemnity" : {"high" : "Unbalanced indemnification, unlimited liability exposure, or insufficient liability caps",
198
- "medium" : "Moderate liability terms; review indemnification mutuality and liability limitations",
199
- "low" : "Balanced indemnification provisions with reasonable liability limitations",
200
- },
201
- "governing_law" : {"high" : "Unfavorable jurisdiction selection, one-sided dispute resolution, or restrictive venue requirements",
202
- "medium" : "Standard governing law terms with generally acceptable jurisdiction and dispute resolution",
203
- "low" : "Reasonable governing law and jurisdiction provisions favorable to both parties",
204
- },
205
- "payment_terms" : {"high" : "Unfavorable payment terms, extended payment periods, or unclear payment conditions",
206
- "medium" : "Standard payment terms with some areas that could be improved for cash flow",
207
- "low" : "Favorable payment terms with reasonable payment periods and clear conditions",
208
- },
209
- "warranties" : {"high" : "Overly broad warranty disclaimers, insufficient product guarantees, or one-sided warranty terms",
210
- "medium" : "Standard warranty provisions with typical product/service guarantees",
211
- "low" : "Comprehensive warranty coverage with reasonable limitations and clear guarantees",
212
- },
213
- "dispute_resolution" : {"high" : "Unfavorable dispute resolution process, restrictive arbitration clauses, or one-sided legal fee allocation",
214
- "medium" : "Standard dispute resolution terms with generally fair arbitration or litigation process",
215
- "low" : "Reasonable dispute resolution process with fair arbitration and cost allocation",
216
- },
217
- "assignment_change" : {"high" : "Restrictive assignment clauses, one-sided change control, or unfavorable amendment procedures",
218
- "medium" : "Standard assignment and change control terms with reasonable flexibility",
219
- "low" : "Reasonable assignment rights and change control processes favorable to both parties",
220
- },
221
- "insurance" : {"high" : "Insufficient insurance requirements, unclear coverage terms, or inadequate policy specifications",
222
- "medium" : "Standard insurance requirements with typical coverage expectations",
223
- "low" : "Comprehensive insurance requirements with clear coverage specifications",
224
- },
225
- "force_majeure" : {"high" : "Overly narrow force majeure definition, insufficient relief provisions, or one-sided termination rights",
226
- "medium" : "Standard force majeure clause with typical relief provisions",
227
- "low" : "Comprehensive force majeure protection with reasonable relief and termination rights",
228
- },
229
- }
230
-
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  @classmethod
233
  def get_adjusted_weights(cls, contract_type: ContractType) -> Dict[str, float]:
@@ -270,4 +281,12 @@ class RiskRules:
270
 
271
  category_description = cls.CATEGORY_DESCRIPTIONS[category][risk_level]
272
 
273
- return category_description
 
 
 
 
 
 
 
 
 
170
  "low" : 20,
171
  }
172
 
173
+ CATEGORY_DESCRIPTIONS = {"restrictive_covenants" : {"high" : "Overly restrictive non-compete, non-solicit, or confidentiality terms that may significantly limit future opportunities",
174
+ "medium" : "Some restrictive terms present; review duration, geographic scope, and industry limitations",
175
+ "low" : "Reasonable restrictive covenants appropriate for this role and industry standards",
176
+ },
177
+ "termination_rights" : {"high" : "Unbalanced termination rights with immediate termination, 'at-will' clauses, or unequal notice periods favoring one party",
178
+ "medium" : "Moderately balanced termination provisions; review notice period requirements and severance terms",
179
+ "low" : "Fair termination rights with reasonable notice periods and balanced severance provisions",
180
+ },
181
+ "penalties_liability" : {"high" : "Excessive penalty clauses, unlimited liability exposure, or one-sided indemnification terms",
182
+ "medium" : "Some concerning liability terms; review indemnification scope, damage limitations, and warranty provisions",
183
+ "low" : "Standard liability limitations, reasonable penalty provisions, and balanced indemnification terms",
184
+ },
185
+ "compensation_benefits" : {"high" : "Compensation structure lacks clarity, contains vague terms, or has unfavorable payment conditions",
186
+ "medium" : "Compensation terms are generally clear but could benefit from more specific bonus structure and payment terms",
187
+ "low" : "Clear and competitive compensation package with well-defined payment terms and bonus structure",
188
+ },
189
+ "intellectual_property" : {"high" : "Overly broad IP assignment that may cover personal projects or lacks proper prior IP exclusion",
190
+ "medium" : "IP terms mostly clear but could benefit from stronger prior IP protection and clearer ownership terms",
191
+ "low" : "Well-defined intellectual property ownership, clear usage rights, and proper prior IP exclusion",
192
+ },
193
+ "confidentiality" : {"high" : "Overly broad confidentiality scope, perpetual duration, or insufficient protection exceptions",
194
+ "medium" : "Standard confidentiality terms with some areas that could be more precisely defined",
195
+ "low" : "Reasonable confidentiality provisions with appropriate scope and duration",
196
+ },
197
+ "liability_indemnity" : {"high" : "Unbalanced indemnification, unlimited liability exposure, or insufficient liability caps",
198
+ "medium" : "Moderate liability terms; review indemnification mutuality and liability limitations",
199
+ "low" : "Balanced indemnification provisions with reasonable liability limitations",
200
+ },
201
+ "governing_law" : {"high" : "Unfavorable jurisdiction selection, one-sided dispute resolution, or restrictive venue requirements",
202
+ "medium" : "Standard governing law terms with generally acceptable jurisdiction and dispute resolution",
203
+ "low" : "Reasonable governing law and jurisdiction provisions favorable to both parties",
204
+ },
205
+ "payment_terms" : {"high" : "Unfavorable payment terms, extended payment periods, or unclear payment conditions",
206
+ "medium" : "Standard payment terms with some areas that could be improved for cash flow",
207
+ "low" : "Favorable payment terms with reasonable payment periods and clear conditions",
208
+ },
209
+ "warranties" : {"high" : "Overly broad warranty disclaimers, insufficient product guarantees, or one-sided warranty terms",
210
+ "medium" : "Standard warranty provisions with typical product/service guarantees",
211
+ "low" : "Comprehensive warranty coverage with reasonable limitations and clear guarantees",
212
+ },
213
+ "dispute_resolution" : {"high" : "Unfavorable dispute resolution process, restrictive arbitration clauses, or one-sided legal fee allocation",
214
+ "medium" : "Standard dispute resolution terms with generally fair arbitration or litigation process",
215
+ "low" : "Reasonable dispute resolution process with fair arbitration and cost allocation",
216
+ },
217
+ "assignment_change" : {"high" : "Restrictive assignment clauses, one-sided change control, or unfavorable amendment procedures",
218
+ "medium" : "Standard assignment and change control terms with reasonable flexibility",
219
+ "low" : "Reasonable assignment rights and change control processes favorable to both parties",
220
+ },
221
+ "insurance" : {"high" : "Insufficient insurance requirements, unclear coverage terms, or inadequate policy specifications",
222
+ "medium" : "Standard insurance requirements with typical coverage expectations",
223
+ "low" : "Comprehensive insurance requirements with clear coverage specifications",
224
+ },
225
+ "force_majeure" : {"high" : "Overly narrow force majeure definition, insufficient relief provisions, or one-sided termination rights",
226
+ "medium" : "Standard force majeure clause with typical relief provisions",
227
+ "low" : "Comprehensive force majeure protection with reasonable relief and termination rights",
228
+ },
229
+ }
230
+
231
+ PROTECTION_NAME_MAP = {"for_cause_definition" : "For Cause Definition",
232
+ "severance_proportion" : "Severance Provision",
233
+ "mutual_indemnification" : "Mutual Indemnification",
234
+ "liability_cap" : "Liability Cap",
235
+ "prior_ip_exclusion" : "Prior IP Exclusion",
236
+ "confidentiality_duration" : "Confidentiality Duration Limit",
237
+ "dispute_resolution" : "Dispute Resolution Process",
238
+ "change_control_process" : "Change Control Process",
239
+ "insurance_requirements" : "Insurance Requirements",
240
+ "force_majeure" : "Force Majeure Protection",
241
+ }
242
 
243
  @classmethod
244
  def get_adjusted_weights(cls, contract_type: ContractType) -> Dict[str, float]:
 
281
 
282
  category_description = cls.CATEGORY_DESCRIPTIONS[category][risk_level]
283
 
284
+ return category_description
285
+
286
+
287
+ @classmethod
288
+ def get_protection_display_name(cls, protection_id: str) -> str:
289
+ """
290
+ Get the display name for a protection ID: Uses PROTECTION_NAME_MAP for known IDs, otherwise formats the ID
291
+ """
292
+ return cls.PROTECTION_NAME_MAP.get(protection_id, protection_id.replace("_", " ").title())
config/settings.py CHANGED
@@ -28,7 +28,7 @@ class Settings(BaseSettings):
28
 
29
  # File Upload Settings
30
  MAX_UPLOAD_SIZE : int = 10 * 1024 * 1024 # 10 MB
31
- ALLOWED_EXTENSIONS : list = ["pdf", "docx", "txt"]
32
  UPLOAD_DIR : Path = Path("uploads")
33
 
34
  # Model Management Settings
 
28
 
29
  # File Upload Settings
30
  MAX_UPLOAD_SIZE : int = 10 * 1024 * 1024 # 10 MB
31
+ ALLOWED_EXTENSIONS : list = [".pdf", ".docx", ".txt"]
32
  UPLOAD_DIR : Path = Path("uploads")
33
 
34
  # Model Management Settings
docs/API_DOCUMENTATION.md ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Contract Risk Analyzer API Documentation
2
+
3
+ This document details the REST API endpoints for the AI Contract Risk Analyzer service.
4
+
5
+ **Base URL:** `http://<your-host>:<your-port>/api/v1` (e.g., `http://localhost:8000/api/v1`)
6
+
7
+ ## Table of Contents
8
+
9
+ * [Health Check](#health-check)
10
+ * [Get Service Status](#get-service-status)
11
+ * [Get Contract Categories](#get-contract-categories)
12
+ * [Analyze Contract from File](#analyze-contract-from-file)
13
+ * [Analyze Contract from Text](#analyze-contract-from-text)
14
+ * [Generate PDF Report](#generate-pdf-report)
15
+ * [Validate Contract File](#validate-contract-file)
16
+ * [Validate Contract Text](#validate-contract-text)
17
+
18
+ ---
19
+
20
+ ## Health Check
21
+
22
+ Checks the basic health and availability of the API service.
23
+
24
+ ### Endpoint
25
+
26
+ `GET /api/v1/health`
27
+
28
+ ### Request
29
+
30
+ No body required.
31
+
32
+ ### Response
33
+
34
+ **Status Code:** `200 OK`
35
+
36
+ **Content-Type:** `application/json`
37
+
38
+ ```json
39
+ {
40
+ "status": "healthy",
41
+ "version": "1.0.0",
42
+ "timestamp": "2025-11-17T15:00:00.123456",
43
+ "models_loaded": 5,
44
+ "services_loaded": 6,
45
+ "memory_usage_mb": 2048.5
46
+ }
47
+ ```
48
+
49
+ ---
50
+
51
+ ## Get Service Status
52
+
53
+ Retrieves detailed status information about the loaded models and services.
54
+
55
+ ### Endpoint
56
+
57
+ `GET /api/v1/status`
58
+
59
+ ### Request
60
+
61
+ No body required.
62
+
63
+ ### Response
64
+
65
+ **Status Code:** `200 OK`
66
+
67
+ **Content-Type:** `application/json`
68
+
69
+ ```json
70
+ {
71
+ "services": {
72
+ "classifier": "loaded",
73
+ "clause_extractor": "loaded",
74
+ "risk_analyzer": "loaded",
75
+ "llm_interpreter": "loaded",
76
+ "negotiation_engine": "loaded",
77
+ "summary_generator": "loaded",
78
+ "term_analyzer": "loaded",
79
+ "protection_checker": "loaded"
80
+ },
81
+ "models": {
82
+ "legal-bert": {
83
+ "name": "legal-bert",
84
+ "type": "LEGAL_BERT",
85
+ "status": "LOADED",
86
+ "loaded_at": "2025-11-17T14:55:00.123456",
87
+ "memory_size_mb": 400.0,
88
+ "access_count": 10,
89
+ "last_accessed": "2025-11-17T15:00:00.123456"
90
+ },
91
+ "embedding": {
92
+ "name": "embedding",
93
+ "type": "EMBEDDING",
94
+ "status": "LOADED",
95
+ "loaded_at": "2025-11-17T14:55:00.123456",
96
+ "memory_size_mb": 100.0,
97
+ "access_count": 8,
98
+ "last_accessed": "2025-11-17T14:59:59.123456"
99
+ }
100
+ },
101
+ "memory_usage_mb": 2048.5,
102
+ "total_services_loaded": 8,
103
+ "total_models_loaded": 5
104
+ }
105
+ ```
106
+
107
+ ---
108
+
109
+ ## Get Contract Categories
110
+
111
+ Retrieves a list of contract categories that the classifier can identify.
112
+
113
+ ### Endpoint
114
+
115
+ `GET /api/v1/categories`
116
+
117
+ ### Request
118
+
119
+ No body required.
120
+
121
+ ### Response
122
+
123
+ **Status Code:** `200 OK`
124
+
125
+ **Content-Type:** `application/json`
126
+
127
+ ```json
128
+ {
129
+ "categories": [
130
+ "employment",
131
+ "consulting",
132
+ "nda",
133
+ "software",
134
+ "service",
135
+ "partnership",
136
+ "lease",
137
+ "purchase",
138
+ "general"
139
+ ]
140
+ }
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Analyze Contract from File
146
+
147
+ Uploads a contract file (PDF, DOCX, TXT) for analysis.
148
+
149
+ ### Endpoint
150
+
151
+ `POST /api/v1/analyze/file`
152
+
153
+ ### Request
154
+
155
+ **Content-Type:** `multipart/form-data`
156
+
157
+ **Form Data:**
158
+
159
+ - `file`: **(Required)** The contract file to analyze (PDF, DOCX, TXT).
160
+ - `max_clauses`: **(Optional, Integer)** Maximum number of clauses to analyze (default: `50`, min: `5`, max: `30`).
161
+ - `interpret_clauses`: **(Optional, Boolean)** Whether to generate LLM interpretations for clauses (default: `true`).
162
+ - `generate_negotiation_points`: **(Optional, Boolean)** Whether to generate negotiation points (default: `true`).
163
+ - `compare_to_market`: **(Optional, Boolean)** Whether to perform market comparison (default: `false`, currently disabled).
164
+
165
+ ### Response
166
+
167
+ **Status Code:** `200 OK`
168
+
169
+ **Content-Type:** `application/json`
170
+
171
+ ```json
172
+ {
173
+ "analysis_id": "a1b2c3d4-e5f6-7890-1234-567890abcdef",
174
+ "timestamp": "2025-11-17T15:01:00.123456",
175
+ "classification": {
176
+ "category": "employment",
177
+ "subcategory": "executive",
178
+ "confidence": 0.95,
179
+ "reasoning": ["Keywords like 'executive', 'compensation', 'non-compete' found"],
180
+ "detected_keywords": ["employment", "executive", "non-compete", "compensation"]
181
+ },
182
+ "clauses": [
183
+ {
184
+ "text": "Employee agrees to a 24-month non-compete...",
185
+ "reference": "Clause 9.5",
186
+ "category": "restrictive_covenants",
187
+ "confidence": 0.98,
188
+ "start_pos": 1200,
189
+ "end_pos": 1350,
190
+ "extraction_method": "semantic",
191
+ "risk_indicators": ["non-compete", "24 months", "entire industry"],
192
+ "risk_score": 90
193
+ }
194
+ ],
195
+ "risk_analysis": {
196
+ "overall_score": 85,
197
+ "risk_level": "CRITICAL",
198
+ "category_scores": {
199
+ "restrictive_covenants": 95,
200
+ "penalties_liability": 90,
201
+ "compensation_benefits": 80
202
+ },
203
+ "risk_factors": ["Restrictive Covenants", "Penalties & Liability"],
204
+ "detailed_findings": {
205
+ "restrictive_covenants": [
206
+ "Non-compete clause (Clause 9.5) is overly broad and long.",
207
+ "Non-solicitation clause (Clause 17.6) has excessive duration."
208
+ ]
209
+ },
210
+ "benchmark_comparison": {
211
+ "overall": "✗ Significantly above market risk levels",
212
+ "high_risk_areas": ["Restrictive Covenants", "Penalties & Liability"]
213
+ },
214
+ "risk_breakdown": [
215
+ {
216
+ "category": "Restrictive Covenants",
217
+ "score": 95,
218
+ "summary": "The agreement contains exceptionally broad and long-lasting non-compete...",
219
+ "findings": ["Non-compete clause (Clause 9.5) is overly broad and long."]
220
+ }
221
+ ],
222
+ "contract_type": "employment",
223
+ "unfavorable_terms": [],
224
+ "missing_protections": []
225
+ },
226
+ "unfavorable_terms": [
227
+ {
228
+ "term": "Risk Factor: entire industry",
229
+ "category": "restrictive_covenants",
230
+ "severity": "critical",
231
+ "explanation": "Non-compete restricts the Employee from applying to any company in the same 'Industry'...",
232
+ "risk_score": 90,
233
+ "clause_reference": "Clause 9.5",
234
+ "suggested_fix": "Negotiate to have this clause removed entirely...",
235
+ "contract_type": "EMPLOYMENT",
236
+ "specific_text": "entire industry",
237
+ "benchmark_info": null,
238
+ "legal_basis": "Reasonableness standard for restrictive covenants"
239
+ }
240
+ ],
241
+ "missing_protections": [
242
+ {
243
+ "protection": "For Cause Definition",
244
+ "importance": "critical",
245
+ "risk_score": 25,
246
+ "explanation": "Without a clear 'for cause' definition, termination grounds remain ambiguous...",
247
+ "recommendation": "Add clear 'For Cause' definition including...",
248
+ "categories": ["termination_rights"],
249
+ "contract_type": "EMPLOYMENT",
250
+ "suggested_language": "\"For Cause\" means: (a) gross negligence...",
251
+ "legal_basis": "Employment protection statutes...",
252
+ "affected_clauses": ["Clause 17.1"]
253
+ }
254
+ ],
255
+ "clause_interpretations": [
256
+ {
257
+ "clause_reference": "Clause 9.5",
258
+ "original_text": "Employee agrees to a 24-month non-compete...",
259
+ "plain_english_summary": "You cannot work for or apply to any company in the same industry for 24 months after leaving.",
260
+ "key_points": [
261
+ "Duration: 24 months",
262
+ "Scope: Entire industry",
263
+ "Applies to: Applying for jobs too"
264
+ ],
265
+ "potential_risks": [
266
+ "Severely limits future job opportunities.",
267
+ "Scope is likely unenforceable."
268
+ ],
269
+ "favorability": "unfavorable",
270
+ "confidence": 0.85,
271
+ "risk_score": 90,
272
+ "negotiation_priority": "high",
273
+ "suggested_improvements": [
274
+ "Reduce duration to 6-12 months.",
275
+ "Narrow scope to direct competitors only."
276
+ ]
277
+ }
278
+ ],
279
+ "negotiation_points": [
280
+ {
281
+ "priority": 1,
282
+ "category": "restrictive_covenants",
283
+ "issue": "Extremely broad non-compete clause",
284
+ "current_language": "Employee agrees to a 24-month non-compete...",
285
+ "proposed_language": "Limit non-compete to 6 months and direct competitors only.",
286
+ "rationale": "The current clause is overly broad and likely unenforceable.",
287
+ "tactic": "limitation",
288
+ "fallback_position": "If 6 months is not accepted, propose 12 months.",
289
+ "estimated_difficulty": "medium",
290
+ "legal_basis": "Reasonableness standard for restrictive covenants",
291
+ "business_impact": "Severely restricts the Employee's ability to find future employment.",
292
+ "counterparty_concerns": "They may argue this is necessary to protect trade secrets.",
293
+ "timing_suggestion": "Address this early in negotiations.",
294
+ "bargaining_chips": [
295
+ "Offer to sign a stronger confidentiality agreement.",
296
+ "Agree to a shorter notice period for termination."
297
+ ]
298
+ }
299
+ ],
300
+ "market_comparisons": [],
301
+ "executive_summary": "This employment agreement is heavily skewed in favor of the Employer...",
302
+ "metadata": {
303
+ "text_length": 15000,
304
+ "word_count": 2500,
305
+ "num_clauses": 20,
306
+ "contract_type": "EMPLOYMENT",
307
+ "actual_category": "employment",
308
+ "options": {
309
+ "max_clauses": 50,
310
+ "interpret_clauses": true,
311
+ "generate_negotiation_points": true,
312
+ "compare_to_market": false
313
+ }
314
+ },
315
+ "pdf_available": true
316
+ }
317
+ ```
318
+
319
+ ### Error Response
320
+
321
+ **Status Code:** `400 Bad Request` or `500 Internal Server Error`
322
+
323
+ **Content-Type:** `application/json`
324
+
325
+ ```json
326
+ {
327
+ "error": "Analysis failed",
328
+ "detail": "Contract text too short. Minimum 300 characters required.",
329
+ "timestamp": "2025-11-17T15:01:01.123456"
330
+ }
331
+ ```
332
+
333
+ ---
334
+
335
+ ## Analyze Contract from Text
336
+
337
+ Analyzes a contract provided as plain text.
338
+
339
+ ### Endpoint
340
+
341
+ `POST /api/v1/analyze/text`
342
+
343
+ ### Request
344
+
345
+ **Content-Type:** `application/x-www-form-urlencoded`
346
+
347
+ **Form Data:**
348
+
349
+ - `contract_text`: **(Required, String)** The full text of the contract.
350
+ - `max_clauses`: **(Optional, Integer)** Maximum number of clauses to analyze (default: `15`, min: `5`, max: `30`).
351
+ - `interpret_clauses`: **(Optional, Boolean)** Whether to generate LLM interpretations for clauses (default: `true`).
352
+ - `generate_negotiation_points`: **(Optional, Boolean)** Whether to generate negotiation points (default: `true`).
353
+ - `compare_to_market`: **(Optional, Boolean)** Whether to perform market comparison (default: `false`, currently disabled).
354
+
355
+ ### Response
356
+
357
+ Same as the response for [Analyze Contract from File](#analyze-contract-from-file).
358
+
359
+ ### Error Response
360
+
361
+ Same as the error response for [Analyze Contract from File](#analyze-contract-from-file).
362
+
363
+ ---
364
+
365
+ ## Generate PDF Report
366
+
367
+ Generates a downloadable PDF report based on the analysis result provided in the request body.
368
+
369
+ ### Endpoint
370
+
371
+ `POST /api/v1/generate-pdf`
372
+
373
+ ### Request
374
+
375
+ **Content-Type:** `application/json`
376
+
377
+ **Body:** The full JSON object returned by a successful `/analyze/file` or `/analyze/text` request.
378
+
379
+ ```json
380
+ {
381
+ "analysis_id": "a1b2c3d4-e5f6-7890-1234-567890abcdef",
382
+ "timestamp": "2025-11-17T15:01:00.123456",
383
+ "classification": { ... },
384
+ "clauses": [ ... ],
385
+ "risk_analysis": { ... },
386
+ "unfavorable_terms": [ ... ],
387
+ "missing_protections": [ ... ],
388
+ "clause_interpretations": [ ... ],
389
+ "negotiation_points": [ ... ],
390
+ "market_comparisons": [ ... ],
391
+ "executive_summary": "...",
392
+ "metadata": { ... },
393
+ "pdf_available": true
394
+ }
395
+ ```
396
+
397
+ ### Response
398
+
399
+ **Status Code:** `200 OK`
400
+
401
+ **Content-Type:** `application/pdf`
402
+
403
+ **Headers:**
404
+
405
+ - `Content-Disposition`: `attachment; filename=contract_analysis_<analysis_id>.pdf`
406
+
407
+ The response body contains the binary PDF data.
408
+
409
+ ### Error Response
410
+
411
+ **Status Code:** `500 Internal Server Error`
412
+
413
+ **Content-Type:** `application/json`
414
+
415
+ ```json
416
+ {
417
+ "error": "Internal server error",
418
+ "detail": "Failed to generate PDF: Some error message",
419
+ "timestamp": "2025-11-17T15:02:00.123456"
420
+ }
421
+ ```
422
+
423
+ ---
424
+
425
+ ## Validate Contract File
426
+
427
+ Validates if an uploaded file is a potentially valid contract document.
428
+
429
+ ### Endpoint
430
+
431
+ `POST /api/v1/validate/file`
432
+
433
+ ### Request
434
+
435
+ **Content-Type:** `multipart/form-data`
436
+
437
+ **Form Data:**
438
+
439
+ - `file`: **(Required)** The contract file to validate (PDF, DOCX, TXT).
440
+
441
+ ### Response
442
+
443
+ **Status Code:** `200 OK`
444
+
445
+ **Content-Type:** `application/json`
446
+
447
+ ```json
448
+ {
449
+ "valid": true,
450
+ "message": "Contract appears valid",
451
+ "confidence": 85.0,
452
+ "report": {
453
+ "scores": {
454
+ "total": 85.0,
455
+ "has_parties": 90.0,
456
+ "has_date": 80.0,
457
+ "has_terms": 90.0
458
+ },
459
+ "found_indicators": ["agreement", "party", "terms"],
460
+ "found_anti_patterns": [],
461
+ "text_statistics": {
462
+ "length": 15000,
463
+ "word_count": 2500,
464
+ "line_count": 300
465
+ }
466
+ }
467
+ }
468
+ ```
469
+
470
+ ### Error Response
471
+
472
+ **Status Code:** `400 Bad Request`
473
+
474
+ **Content-Type:** `application/json`
475
+
476
+ ```json
477
+ {
478
+ "error": "Validation failed",
479
+ "detail": "File too large. Max size: 10.0MB",
480
+ "timestamp": "2025-11-17T15:03:00.123456"
481
+ }
482
+ ```
483
+
484
+ ---
485
+
486
+ ## Validate Contract Text
487
+
488
+ Validates if a provided text string is a potentially valid contract.
489
+
490
+ ### Endpoint
491
+
492
+ `POST /api/v1/validate/text`
493
+
494
+ ### Request
495
+
496
+ **Content-Type:** `application/x-www-form-urlencoded`
497
+
498
+ **Form Data:**
499
+
500
+ - `contract_text`: **(Required, String)** The text to validate.
501
+
502
+ ### Response
503
+
504
+ **Status Code:** `200 OK`
505
+
506
+ **Content-Type:** `application/json`
507
+
508
+ ```json
509
+ {
510
+ "valid": true,
511
+ "message": "Contract appears valid",
512
+ "confidence": 78.0,
513
+ "report": {
514
+ "scores": {
515
+ "total": 78.0,
516
+ "has_parties": 85.0,
517
+ "has_date": 70.0,
518
+ "has_terms": 80.0
519
+ },
520
+ "found_indicators": ["agreement", "party", "payment"],
521
+ "found_anti_patterns": [],
522
+ "text_statistics": {
523
+ "length": 1200,
524
+ "word_count": 200,
525
+ "line_count": 25
526
+ }
527
+ }
528
+ }
529
+ ```
530
+
531
+ ### Error Response
532
+
533
+ **Status Code:** `400 Bad Request`
534
+
535
+ **Content-Type:** `application/json`
536
+
537
+ ```json
538
+ {
539
+ "error": "Validation failed",
540
+ "detail": "Contract text too short. Minimum 300 characters required.",
541
+ "timestamp": "2025-11-17T15:04:00.123456"
542
+ }
543
+ ```
544
+
545
+ ---
546
+
547
+ ## Notes
548
+
549
+ - All timestamps are in ISO 8601 format
550
+ - All risk scores are integers from 0-100
551
+ - The API uses custom JSON serialization to handle NumPy types
552
+ - CORS is enabled for all origins in development
553
+ - Maximum file upload size is configurable via settings (default: 10MB)
554
+ - Minimum contract text length: 300 characters (configurable)
555
+ - Maximum contract text length: configurable via settings
docs/BLOGPOST.md CHANGED
@@ -2,7 +2,7 @@
2
 
3
  ## The Paradigm Shift in Contractual Comprehension
4
 
5
- In our contemporary landscape, where legal contracts underpin virtually every commercial transaction and professional engagement, the capacity to decipher and negotiate equitable terms has evolved from a specialized skill to an essential competency. Yet, the labyrinthine complexity of legal vernacular continues to erect formidable barriers for those outside the legal profession, resulting in executed agreements that frequently conceal latent risks and disadvantageous provisions. The **AI Contract Risk Analyzer** heralds a transformative approach to legal document scrutiny, harnessing sophisticated artificial intelligence to deliver instantaneous, exhaustive contract risk evaluation accessible to all.
6
 
7
  ---
8
 
@@ -28,9 +28,9 @@ Legal documentation presents an intricate tapestry of challenges:
28
 
29
  ---
30
 
31
- ## The Resolution: Artificial Intelligence-Powered Contract Intelligence
32
 
33
- The AI Contract Risk Analyzer bridges the chasm between legal complexity and human understanding through an elaborate, multi-tiered artificial intelligence ecosystem.
34
 
35
  ### Operational Methodology: The User Odyssey
36
 
@@ -45,21 +45,27 @@ Effortlessly upload your PDF or Word document into our secure digital environmen
45
  - And numerous additional contract classifications
46
 
47
  #### Phase 2: Instantaneous AI Scrutiny
48
- Within a remarkable 30-second window, our AI engine executes:
49
- - **Contract categorization** to identify document typology
50
- - **Provisional extraction** to isolate crucial stipulations
51
- - **Risk quantification** across eight distinct dimensions
52
- - **Market benchmarking** against industry standards
53
- - **Protection gap analysis** to identify omissions
 
 
 
 
54
 
55
  #### Phase 3: Comprehensive Insight Acquisition
56
  Receive an exhaustive analysis comprising:
57
  - **Holistic Risk Assessment** (0-100 scale) with unambiguous severity classification
58
  - **Executive Synopsis** in accessible vernacular
59
- - **Clause-by-Clause Deconstruction** with contextual interpretations
60
  - **Identified Unfavorable Terms** with remediation suggestions
61
  - **Absent Protections** requiring inclusion
62
  - **Prioritized Negotiation Points** by significance hierarchy
 
 
63
 
64
  #### Phase 4: Strategic Implementation
65
  Equipped with profound insights, you can:
@@ -72,41 +78,58 @@ Equipped with profound insights, you can:
72
 
73
  ## The Technological Architecture: Engineering the Extraordinary
74
 
75
- ### Polymorphic AI Framework
 
 
76
 
77
- Our platform transcends singular AI model dependency, instead constructing an intricate ensemble of specialized models operating in symphonic coordination:
 
 
 
 
78
 
79
- #### 1. Legal-BERT for Jurisprudential Comprehension
80
- We've refined the Legal-BERT model (nlpaueb/legal-bert-base-uncased) specifically for contract analysis. This model apprehends legal language subtleties that conventional AI overlooks:
 
 
 
 
 
81
  - Discerns 15+ categories of contractual stipulations
82
  - Achieves 92% precision in clause boundary identification
83
  - Manages complex legal syntax and cross-referential elements
84
-
85
- #### 2. Semantic Equivalency Engine
86
- Leveraging advanced embedding architectures, we transmute legal text into mathematical vectors that encapsulate meaning:
87
- - Transforms clauses into 384-dimensional vector representations
88
- - Facilitates comparison against 1000+ established risk patterns
89
- - Benchmarks your contract against industry standards
90
-
91
- #### 3. Large Language Model Integration for Natural Communication
92
- We interface with multiple Large Language Models (Ollama, OpenAI GPT-3.5, Anthropic Claude) to:
93
- - Generate lucid explanations of intricate clauses
94
- - Provide contextually-aware negotiation recommendations
95
- - Create executive summaries tailored to comprehension levels
96
-
97
- #### 4. Multifactor Risk Evaluation
98
- Our proprietary risk assessment algorithm examines:
99
- - **Lexical Severity**: Detection of critical risk terminology
100
- - **Structural Configurations**: Recognition of unfavorable clause architectures
101
- - **Provision-Level Analysis**: Intensive examination of specific stipulations
102
- - **Industry Benchmarks**: Comparison to market conventions
103
- - **Protection Deficiencies**: Identification of absent safeguards
 
 
 
 
 
 
104
 
105
  ---
106
 
107
  ## Distinguishing Characteristics: Our Competitive Differentiation
108
 
109
- ### 1. Comprehensive Risk Deconstruction
110
  Diverging from elementary keyword scanners, we furnish detailed analysis across multiple vectors:
111
  - **Restrictive Covenants** (non-competition, non-solicitation)
112
  - **Termination Privileges** (notice periods, for-cause definitions)
@@ -114,28 +137,30 @@ Diverging from elementary keyword scanners, we furnish detailed analysis across
114
  - **Compensation & Benefits** (transparency, discretion, vesting)
115
  - **Intellectual Property** (proprietorship, scope, exclusions)
116
 
117
- ### 2. Actionable Negotiation Strategies
118
- We transcend problem identification to provide tangible solutions:
119
- - Hierarchically prioritized issues (critical → low priority)
120
- - Specific proposed language for each concern
121
- - Rationale elucidating the significance
122
- - Alternative positions if primary requests encounter resistance
123
- - Estimated negotiation complexity
124
-
125
- ### 3. Accessible Interpretations
126
- Every complex legal provision undergoes translation into comprehensible language:
127
- - **Synopsis**: Essential meaning in 1-2 sentences
128
- - **Salient Points**: 3-5 critical comprehension elements
129
- - **Potential Exposure**: 2-4 concerns or warning indicators
130
- - **Equitability Assessment**: Rated as favorable/neutral/unfavorable
131
-
132
- ### 4. Market Contextualization
 
 
133
  Evaluate your contract's positioning relative to industry standards:
134
  - Comparison to reasonable, conventional, and aggressive market terms
135
  - Similarity metrics demonstrating alignment with best practices
136
  - Recommendations for normalizing outlier provisions
137
 
138
- ### 5. Protection Deficiency Audit
139
  We identify crucial safeguards that warrant inclusion but remain absent:
140
  - For-cause termination definitions
141
  - Severance provisions
@@ -153,23 +178,36 @@ We identify crucial safeguards that warrant inclusion but remain absent:
153
  - **Sentence Transformers** (all-MiniLM-L6-v2): Semantic similarity and embeddings
154
  - **PyTorch**: Deep learning architecture
155
  - **Hugging Face Transformers**: Model deployment and inference
 
156
 
157
  ### Backend Architecture
158
  - **FastAPI**: High-performance REST API (1000+ requests/second)
159
  - **Python 3.10+**: Core application logic
160
  - **Asynchronous Processing**: Background operations for extended analysis
161
- - **Redis Caching**: Sub-second response intervals for recurrent queries
 
162
 
163
  ### Document Processing Pipeline
164
- - **PyMuPDF**: Superior PDF text extraction
165
  - **python-docx**: Word document processing
166
  - **Custom NLP Pipeline**: Legal-specific text refinement and normalization
 
 
167
 
168
  ### LLM Integration Framework
169
  - **Ollama**: Local model hosting (privacy-centric)
170
  - **OpenAI API**: GPT-3.5/4 integration
171
  - **Anthropic Claude**: Enterprise-grade reasoning
172
  - **Multi-provider Redundancy**: Automatic failover for reliability
 
 
 
 
 
 
 
 
 
173
 
174
  ---
175
 
@@ -182,6 +220,7 @@ We recognize that contracts contain sensitive information. Our security infrastr
182
  - **Ephemeral processing**: Documents purged immediately post-analysis
183
  - **Exclusion from training**: Your contracts never utilized for model enhancement
184
  - **Isolated processing**: Each analysis operates in a segregated environment
 
185
 
186
  ### Regulatory Compliance
187
  - **GDPR adherence**: Data residency and right-to-erasure support
@@ -191,7 +230,7 @@ We recognize that contracts contain sensitive information. Our security infrastr
191
  ### User Autonomy
192
  - **Report acquisition**: PDF exports for personal archives
193
  - **Instant deletion**: Single-command eradication of all analysis history
194
- - **Anonymous utilization**: No account mandatory for fundamental analysis
195
 
196
  ---
197
 
@@ -200,18 +239,23 @@ We recognize that contracts contain sensitive information. Our security infrastr
200
  ### Immediate Horizon
201
  - **Multilingual capability**: Expansion beyond English to major global languages
202
  - **Sector-specific templates**: Industry-tailored analysis (healthcare, finance, technology)
 
 
203
 
204
  ### Intermediate Timeline
205
  - **Predictive analytics**: Dispute probability forecasting based on clause patterns
206
- - **Regulatory conformity**: Automated verification against GDPR, CCPA, industry regulations
207
  - **Smart contract analysis**: Support for blockchain-based agreements
208
  - **Collaborative examination**: Team workflows with role-based permissions
 
209
 
210
  ### Long-term Aspiration
211
  - **AI negotiation facilitation**: Real-time negotiation support during contractual discussions
212
  - **Legal outcome projection**: Machine learning models predicting litigation results
213
  - **Global legal repository**: Anonymous pattern aggregation for market intelligence
214
  - **Jurisdiction-specific analysis**: Deep integration with municipal laws and precedents
 
 
215
 
216
  ---
217
 
@@ -220,7 +264,7 @@ We recognize that contracts contain sensitive information. Our security infrastr
220
  Prepared to assume command of your contracts? Initiation is straightforward:
221
 
222
  1. **Access our platform**: [contractguardai.com](https://contractguardai.com)
223
- 2. **Submit a contract**: PDF or Word document
224
  3. **Receive instantaneous analysis**: Results within 30 seconds
225
  4. **Acquire your report**: Comprehensive PDF with complete findings
226
 
@@ -232,11 +276,13 @@ No financial instrument required for your analysis. No installation necessary. N
232
 
233
  The AI Contract Risk Analyzer transcends mere tool status—it represents a movement toward legal transparency and accessibility. Whether you're executing your inaugural employment contract or reviewing your centesimal vendor agreement, you merit comprehension of your commitments.
234
 
 
 
235
  **Your contracts. Your rights. Your assurance.**
236
 
237
  ---
238
 
239
  *Ultimate Revision: November 2025* | *Version: 1.0*
240
 
241
- > © 2025 AI Contract Risk Analyzer. Democratizing legal intelligence for global accessibility.
242
- ---
 
2
 
3
  ## The Paradigm Shift in Contractual Comprehension
4
 
5
+ In our contemporary landscape, where legal contracts underpin virtually every commercial transaction and professional engagement, the capacity to decipher and negotiate equitable terms has evolved from a specialized skill to an essential competency. Yet, the labyrinthine complexity of legal vernacular continues to erect formidable barriers for those outside the legal profession, resulting in executed agreements that frequently conceal latent risks and disadvantageous provisions. The **AI Contract Risk Analyzer** heralds a transformative approach to legal document scrutiny, harnessing a sophisticated, integrated artificial intelligence pipeline to deliver instantaneous, comprehensive contract risk evaluation accessible to all.
6
 
7
  ---
8
 
 
28
 
29
  ---
30
 
31
+ ## The Resolution: An Integrated AI Analysis Pipeline
32
 
33
+ The AI Contract Risk Analyzer bridges the chasm between legal complexity and human understanding through a meticulously orchestrated, multi-stage artificial intelligence ecosystem that processes contracts with surgical precision.
34
 
35
  ### Operational Methodology: The User Odyssey
36
 
 
45
  - And numerous additional contract classifications
46
 
47
  #### Phase 2: Instantaneous AI Scrutiny
48
+ Within a remarkable 40 - 60-second window, our AI engine executes a comprehensive analysis across eight distinct stages:
49
+
50
+ 1. **Contract Classification**: Identifying the agreement type (e.g., Employment, NDA, Lease)
51
+ 2. **Clause Extraction**: Isolating and categorizing key contractual provisions
52
+ 3. **Unfavorable Term Detection**: Pinpointing one-sided, punitive, or ambiguous language
53
+ 4. **Missing Protection Identification**: Recognizing critical safeguards absent from the agreement
54
+ 5. **Risk Scoring & Aggregation**: Calculating a holistic risk score across multiple dimensions
55
+ 6. **LLM-Powered Interpretation**: Generating plain-English explanations of complex clauses
56
+ 7. **Negotiation Strategy Generation**: Creating prioritized talking points with strategic context
57
+ 8. **Executive Summary Synthesis**: Producing a concise, actionable overview
58
 
59
  #### Phase 3: Comprehensive Insight Acquisition
60
  Receive an exhaustive analysis comprising:
61
  - **Holistic Risk Assessment** (0-100 scale) with unambiguous severity classification
62
  - **Executive Synopsis** in accessible vernacular
63
+ - **Interactive Clause-by-Clause Deconstruction** with scrollable sections
64
  - **Identified Unfavorable Terms** with remediation suggestions
65
  - **Absent Protections** requiring inclusion
66
  - **Prioritized Negotiation Points** by significance hierarchy
67
+ - **Risk Category Breakdown** with visual progress indicators
68
+ - **Downloadable PDF Report** for offline reference
69
 
70
  #### Phase 4: Strategic Implementation
71
  Equipped with profound insights, you can:
 
78
 
79
  ## The Technological Architecture: Engineering the Extraordinary
80
 
81
+ ### A Symphony of Specialized AI Components
82
+
83
+ Our platform transcends singular AI model dependency, instead constructing an intricate ensemble of specialized models operating in symphonic coordination through a unified processing pipeline.
84
 
85
+ #### 1. **Contract Classifier**: Intelligent Typology Identification
86
+ At the heart of our system lies a sophisticated classifier that determines the contract's nature with high precision. This component uses:
87
+ - **Multi-modal scoring** combining keyword matching, semantic similarity, and Legal-BERT embeddings
88
+ - **Hierarchical categorization** to identify primary type (e.g., "employment") and subcategories (e.g., "executive")
89
+ - **Confidence calibration** with detailed reasoning to ensure accurate downstream processing
90
 
91
+ #### 2. **Risk-Focused Clause Extractor**: Precision-Driven Discovery
92
+ We employ a two-tiered extraction system:
93
+ - **Comprehensive Extractor**: Identifies structural patterns and semantic chunks across all legal domains
94
+ - **Risk-Specific Re-classifier**: Maps extracted clauses to **risk categories** (e.g., "restrictive_covenants", "termination_rights") rather than generic clause types, enabling precise risk quantification aligned with industry standards
95
+
96
+ #### 3. **Legal-BERT & Embedding Models**: Deep Semantic Understanding
97
+ We've refined the Legal-BERT model (nlpaueb/legal-bert-base-uncased) specifically for contract analysis. These models apprehend legal language subtleties that conventional AI overlooks:
98
  - Discerns 15+ categories of contractual stipulations
99
  - Achieves 92% precision in clause boundary identification
100
  - Manages complex legal syntax and cross-referential elements
101
+ - Uses sentence transformers (all-MiniLM-L6-v2) for semantic similarity at scale
102
+
103
+ #### 4. **Integrated Risk Engine**: Holistic Risk Quantification
104
+ Our proprietary `ComprehensiveRiskAnalyzer` orchestrates the entire pipeline:
105
+ - **Weighted category scoring** adjusted for contract type (e.g., restrictive covenants are weighted higher in employment contracts)
106
+ - **Risk factor integration** from red flags, pattern matching, and keyword detection
107
+ - **Cross-component validation** ensuring consistency between term analysis, protection checks, and clause evaluation
108
+ - **Dynamic threshold application** based on industry benchmarks and jurisdictional norms
109
+
110
+ #### 5. **LLM Interpreter & Negotiation Engine**: Human-Centric Explanation
111
+ We interface with multiple Large Language Models to transform technical findings into actionable insights:
112
+ - **Ollama (Local)**: Privacy-centric interpretation with llama3:8b
113
+ - **OpenAI GPT-3.5/4**: High-fidelity natural language generation
114
+ - **Anthropic Claude**: Enterprise-grade reasoning and ethical alignment
115
+ - **Unified LLM Manager**: Automatic provider fallback ensures reliability
116
+
117
+ This generates:
118
+ - Plain-English interpretations of complex clauses
119
+ - Contextually-aware negotiation recommendations
120
+ - Executive summaries tailored to business impact
121
+
122
+ #### 6. **Proactive Protection Checker**: Gap Detection
123
+ Our system identifies what's *missing*—not just what's present:
124
+ - **Critical protections checklist** calibrated for each contract type
125
+ - **Semantic gap analysis** detecting absence of standard safeguards
126
+ - **Risk-if-missing quantification** to prioritize remediation
127
 
128
  ---
129
 
130
  ## Distinguishing Characteristics: Our Competitive Differentiation
131
 
132
+ ### 1. **True End-to-End Risk Analysis**
133
  Diverging from elementary keyword scanners, we furnish detailed analysis across multiple vectors:
134
  - **Restrictive Covenants** (non-competition, non-solicitation)
135
  - **Termination Privileges** (notice periods, for-cause definitions)
 
137
  - **Compensation & Benefits** (transparency, discretion, vesting)
138
  - **Intellectual Property** (proprietorship, scope, exclusions)
139
 
140
+ ### 2. **Actionable Negotiation Playbook**
141
+ We transcend problem identification to provide tangible solutions through a comprehensive strategy document:
142
+ - **Priority-ranked issues** (1=highest, 5=lowest)
143
+ - **Specific proposed language** with multiple improvement options
144
+ - **Strategic rationale** grounded in legal principles and business impact
145
+ - **Counterparty concern anticipation** to strengthen negotiation position
146
+ - **Timing guidance** for optimal issue-raising sequence
147
+ - **Bargaining chips** to trade for concessions
148
+
149
+ ### 3. **Context-Aware Interpretation**
150
+ Every complex legal provision undergoes translation into comprehensible language with contextual depth:
151
+ - **Plain-English summary** of core meaning
152
+ - **Key points** highlighting essential implications
153
+ - **Potential risks** outlining exposure and consequences
154
+ - **Favorability assessment** from the recipient's perspective
155
+ - **Suggested improvements** for balanced terms
156
+
157
+ ### 4. **Market Contextualization**
158
  Evaluate your contract's positioning relative to industry standards:
159
  - Comparison to reasonable, conventional, and aggressive market terms
160
  - Similarity metrics demonstrating alignment with best practices
161
  - Recommendations for normalizing outlier provisions
162
 
163
+ ### 5. **Protection Deficiency Audit**
164
  We identify crucial safeguards that warrant inclusion but remain absent:
165
  - For-cause termination definitions
166
  - Severance provisions
 
178
  - **Sentence Transformers** (all-MiniLM-L6-v2): Semantic similarity and embeddings
179
  - **PyTorch**: Deep learning architecture
180
  - **Hugging Face Transformers**: Model deployment and inference
181
+ - **Model Registry**: Thread-safe management with LRU eviction for efficient memory use
182
 
183
  ### Backend Architecture
184
  - **FastAPI**: High-performance REST API (1000+ requests/second)
185
  - **Python 3.10+**: Core application logic
186
  - **Asynchronous Processing**: Background operations for extended analysis
187
+ - **Redis-like Caching**: Disk-based caching with TTL for model outputs
188
+ - **Thread Safety**: Robust handling of concurrent analysis requests
189
 
190
  ### Document Processing Pipeline
191
+ - **PyMuPDF**: Superior PDF text extraction with layout preservation
192
  - **python-docx**: Word document processing
193
  - **Custom NLP Pipeline**: Legal-specific text refinement and normalization
194
+ - **Encoding Detection**: Automated character encoding identification
195
+ - **Metadata Extraction**: Document properties and structural information
196
 
197
  ### LLM Integration Framework
198
  - **Ollama**: Local model hosting (privacy-centric)
199
  - **OpenAI API**: GPT-3.5/4 integration
200
  - **Anthropic Claude**: Enterprise-grade reasoning
201
  - **Multi-provider Redundancy**: Automatic failover for reliability
202
+ - **Rate Limiting**: Token bucket algorithm to manage API usage
203
+ - **Cost Estimation**: Real-time cost tracking for cloud-based providers
204
+
205
+ ### Frontend Experience
206
+ - **Responsive Design**: Mobile-first approach with desktop optimization
207
+ - **Scrollable Analysis Sections**: Interactive viewing of lengthy reports
208
+ - **Real-time Feedback**: Loading states and error handling
209
+ - **Visual Risk Indicators**: Color-coded risk levels and confidence scores
210
+ - **Immediate Download**: One-click PDF report generation with embedded charts
211
 
212
  ---
213
 
 
220
  - **Ephemeral processing**: Documents purged immediately post-analysis
221
  - **Exclusion from training**: Your contracts never utilized for model enhancement
222
  - **Isolated processing**: Each analysis operates in a segregated environment
223
+ - **Zero persistent storage**: No account required; no data retained
224
 
225
  ### Regulatory Compliance
226
  - **GDPR adherence**: Data residency and right-to-erasure support
 
230
  ### User Autonomy
231
  - **Report acquisition**: PDF exports for personal archives
232
  - **Instant deletion**: Single-command eradication of all analysis history
233
+ - **Anonymous utilization**: No registration mandatory for fundamental analysis
234
 
235
  ---
236
 
 
239
  ### Immediate Horizon
240
  - **Multilingual capability**: Expansion beyond English to major global languages
241
  - **Sector-specific templates**: Industry-tailored analysis (healthcare, finance, technology)
242
+ - **Enhanced LLM integration**: More sophisticated negotiation point generation
243
+ - **Improved visual analytics**: Interactive risk dashboards and trend analysis
244
 
245
  ### Intermediate Timeline
246
  - **Predictive analytics**: Dispute probability forecasting based on clause patterns
247
+ - **Regulatory conformity**: Automated verification against GDPR, CCPA, HIPAA, and other regulations
248
  - **Smart contract analysis**: Support for blockchain-based agreements
249
  - **Collaborative examination**: Team workflows with role-based permissions
250
+ - **Version comparison**: Track changes between contract drafts
251
 
252
  ### Long-term Aspiration
253
  - **AI negotiation facilitation**: Real-time negotiation support during contractual discussions
254
  - **Legal outcome projection**: Machine learning models predicting litigation results
255
  - **Global legal repository**: Anonymous pattern aggregation for market intelligence
256
  - **Jurisdiction-specific analysis**: Deep integration with municipal laws and precedents
257
+ - **Automated clause drafting**: Generate balanced, legally sound alternatives
258
+ - **Continuous monitoring**: Alert users when existing contracts need re-evaluation
259
 
260
  ---
261
 
 
264
  Prepared to assume command of your contracts? Initiation is straightforward:
265
 
266
  1. **Access our platform**: [contractguardai.com](https://contractguardai.com)
267
+ 2. **Submit a contract**: Paste text or upload a PDF/DOCX file
268
  3. **Receive instantaneous analysis**: Results within 30 seconds
269
  4. **Acquire your report**: Comprehensive PDF with complete findings
270
 
 
276
 
277
  The AI Contract Risk Analyzer transcends mere tool status—it represents a movement toward legal transparency and accessibility. Whether you're executing your inaugural employment contract or reviewing your centesimal vendor agreement, you merit comprehension of your commitments.
278
 
279
+ Our enhanced architecture ensures that every component—from clause extraction to final PDF generation—operates in harmony, delivering reports that match the quality of the attached examples while maintaining robustness, scalability, and user-friendliness.
280
+
281
  **Your contracts. Your rights. Your assurance.**
282
 
283
  ---
284
 
285
  *Ultimate Revision: November 2025* | *Version: 1.0*
286
 
287
+ © 2025 AI Contract Risk Analyzer. Democratizing legal intelligence for global accessibility.
288
+ ---
reporter/pdf_generator.py CHANGED
@@ -1,5 +1,6 @@
1
  # DEPENDENCIES
2
  import os
 
3
  from typing import Any
4
  from io import BytesIO
5
  from typing import Dict
@@ -12,36 +13,52 @@ from reportlab.platypus import Image
12
  from reportlab.platypus import Table
13
  from reportlab.lib.units import inch
14
  from reportlab.platypus import Spacer
 
15
  from reportlab.lib.enums import TA_LEFT
16
  from reportlab.platypus import Paragraph
17
  from reportlab.platypus import PageBreak
18
  from reportlab.graphics import renderPDF
19
  from reportlab.platypus import TableStyle
20
  from reportlab.lib.enums import TA_CENTER
 
21
  from reportlab.lib.enums import TA_JUSTIFY
22
  from reportlab.lib.pagesizes import letter
 
23
  from reportlab.platypus import KeepTogether
24
  from reportlab.graphics.shapes import Circle
25
  from reportlab.graphics.shapes import String
 
26
  from reportlab.graphics.shapes import Drawing
27
  from reportlab.lib.styles import ParagraphStyle
28
  from reportlab.platypus import SimpleDocTemplate
 
 
29
  from reportlab.lib.styles import getSampleStyleSheet
30
-
31
 
32
 
33
  class PDFReportGenerator:
34
  """
35
- Generate professional PDF reports matching the sample style
36
  """
37
  def __init__(self):
38
- self.styles = getSampleStyleSheet()
39
- self._setup_custom_styles()
40
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def _setup_custom_styles(self):
43
  """
44
- Setup custom paragraph styles
45
  """
46
  # Title style
47
  self.styles.add(ParagraphStyle(name = 'ReportTitle',
@@ -53,7 +70,7 @@ class PDFReportGenerator:
53
  fontName = 'Helvetica-Bold',
54
  )
55
  )
56
-
57
  # Section heading
58
  self.styles.add(ParagraphStyle(name = 'SectionHeading',
59
  parent = self.styles['Heading2'],
@@ -64,389 +81,727 @@ class PDFReportGenerator:
64
  fontName = 'Helvetica-Bold',
65
  )
66
  )
67
-
 
 
 
 
 
 
 
 
 
 
 
68
  # Body text
69
- self.styles.add(ParagraphStyle(
70
- name='CustomBodyText',
71
- parent=self.styles['Normal'],
72
- fontSize=10,
73
- leading=14,
74
- textColor=colors.HexColor('#333333'),
75
- alignment=TA_JUSTIFY,
76
- fontName='Helvetica'
77
- ))
78
-
79
- # Small text style (add this)
80
- self.styles.add(ParagraphStyle(
81
- name='SmallText',
82
- parent=self.styles['Normal'],
83
- fontSize=8,
84
- leading=10,
85
- textColor=colors.HexColor('#666666'),
86
- fontName='Helvetica'
87
- ))
88
-
 
 
89
  # Bullet point
90
- self.styles.add(ParagraphStyle(
91
- name='BulletPoint',
92
- parent=self.styles['Normal'],
93
- fontSize=10,
94
- leading=14,
95
- textColor=colors.HexColor('#333333'),
96
- leftIndent=20,
97
- bulletIndent=10,
98
- fontName='Helvetica'
99
- ))
100
-
 
 
 
 
101
  # Table header
102
- self.styles.add(ParagraphStyle(
103
- name='TableHeader',
104
- parent=self.styles['Normal'],
105
- fontSize=10,
106
- textColor=colors.HexColor('#1a1a1a'),
107
- fontName='Helvetica-Bold'
108
- ))
109
-
 
 
 
 
 
 
 
 
 
 
 
 
110
  # Footer
111
- self.styles.add(ParagraphStyle(
112
- name='Footer',
113
- parent=self.styles['Normal'],
114
- fontSize=8,
115
- textColor=colors.HexColor('#666666'),
116
- alignment=TA_CENTER,
117
- fontName='Helvetica'
118
- ))
119
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def _draw_risk_score_circle(self, score: int) -> Drawing:
121
- """Draw the risk score circle graphic"""
122
- d = Drawing(150, 150)
 
 
123
 
 
 
 
 
 
 
124
  # Determine color based on score
125
- if score >= 80:
126
- color = colors.HexColor('#dc2626')
127
- elif score >= 60:
128
- color = colors.HexColor('#f97316')
129
- elif score >= 40:
130
- color = colors.HexColor('#ca8a04')
 
 
 
131
  else:
132
- color = colors.HexColor('#16a34a')
133
-
134
- # Background circle
135
- bg_circle = Circle(75, 75, 60)
136
- bg_circle.fillColor = colors.HexColor('#f0f0f0')
137
  bg_circle.strokeColor = None
 
138
  d.add(bg_circle)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Score circle
141
- score_circle = Circle(75, 75, 55)
142
- score_circle.fillColor = color
143
- score_circle.strokeColor = None
144
- d.add(score_circle)
145
-
146
- # Inner white circle
147
- inner_circle = Circle(75, 75, 45)
148
- inner_circle.fillColor = colors.white
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  inner_circle.strokeColor = None
150
  d.add(inner_circle)
151
-
152
- # Score text
153
- score_text = String(75, 70, str(score), textAnchor='middle')
154
- score_text.fontSize = 36
155
- score_text.fontName = 'Helvetica-Bold'
156
- score_text.fillColor = color
157
  d.add(score_text)
158
-
 
 
 
 
 
 
 
159
  return d
160
-
 
161
  def _get_risk_color(self, score: int) -> colors.Color:
162
- """Get color based on risk score"""
163
- if score >= 80:
 
 
164
  return colors.HexColor('#dc2626')
165
- elif score >= 60:
 
166
  return colors.HexColor('#f97316')
167
- elif score >= 40:
 
168
  return colors.HexColor('#ca8a04')
 
169
  else:
170
  return colors.HexColor('#16a34a')
171
-
 
172
  def _create_header_footer(self, canvas, doc):
173
- """Add header and footer to each page"""
 
 
174
  canvas.saveState()
175
-
176
  # Header
177
  canvas.setFont('Helvetica-Bold', 12)
178
- canvas.drawString(0.75 * inch, letter[1] - 0.5 * inch,
179
- "AI Contract Risk Analysis Report")
180
-
181
  # Footer
182
  canvas.setFont('Helvetica', 8)
183
  canvas.setFillColor(colors.HexColor('#666666'))
184
-
185
- # Page number (without total pages)
186
  page_num = f"Page {doc.page}"
187
- canvas.drawString(7 * inch, 0.5 * inch, page_num)
188
-
189
- # Legal disclaimer
190
  disclaimer = "For informational purposes only. Not legal advice."
191
- canvas.drawCentredString(letter[0] / 2, 0.5 * inch, disclaimer)
192
-
193
  canvas.restoreState()
194
 
195
-
196
- def generate_report(self, analysis_result: Dict[str, Any],
197
- output_path: Optional[str] = None) -> BytesIO:
198
  """
199
  Generate PDF report from analysis results
200
-
201
- Args:
202
- analysis_result: Analysis result dictionary from the API
203
- output_path: Optional file path to save PDF
204
-
 
 
205
  Returns:
206
- BytesIO buffer containing the PDF
 
207
  """
208
  # Create buffer
209
  buffer = BytesIO()
210
-
211
  # Create document
212
- doc = SimpleDocTemplate(
213
- buffer if not output_path else output_path,
214
- pagesize=letter,
215
- rightMargin=0.75*inch,
216
- leftMargin=0.75*inch,
217
- topMargin=1*inch,
218
- bottomMargin=1*inch
219
- )
220
-
221
  # Build story
222
- story = []
223
-
224
- # Title and Risk Score (Page 1)
225
  story.extend(self._build_page_1(analysis_result))
226
  story.append(PageBreak())
227
-
228
- # Negotiation Points (Page 2)
229
  story.extend(self._build_page_2(analysis_result))
230
  story.append(PageBreak())
231
-
232
- # Risk Category Breakdown (Page 3)
233
  story.extend(self._build_page_3(analysis_result))
234
-
235
- # Clause-by-Clause Analysis (Page 4+)
236
  story.append(PageBreak())
237
- story.extend(self._build_clause_analysis(analysis_result))
238
-
 
 
 
 
 
 
 
 
 
 
239
  # Build PDF
240
- doc.build(story, onFirstPage=self._create_header_footer,
241
- onLaterPages=self._create_header_footer)
242
-
243
  # If using buffer, seek to beginning
244
  if not output_path:
245
  buffer.seek(0)
246
  return buffer
247
-
248
  return buffer
249
-
 
250
  def _build_page_1(self, result: Dict) -> List:
251
- """Build page 1 content: Title, Risk Score, Executive Summary, Key Items"""
252
- elements = []
253
-
 
 
254
  # Title
255
- elements.append(Paragraph("AI Contract Risk Analysis Report",
256
- self.styles['ReportTitle']))
257
  elements.append(Spacer(1, 0.1*inch))
 
 
 
 
 
258
 
259
- # Risk Score Circle
260
- risk_score = result['risk_analysis']['overall_score']
261
- elements.append(self._draw_risk_score_circle(risk_score))
262
- elements.append(Spacer(1, 0.2*inch))
263
-
264
- # Executive Summary
265
- elements.append(Paragraph("Executive Summary",
266
- self.styles['SectionHeading']))
267
- elements.append(Paragraph(result['executive_summary'],
268
- self.styles['CustomBodyText']))
269
  elements.append(Spacer(1, 0.2*inch))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  # Unfavorable Terms
272
- elements.append(Paragraph("Unfavorable Terms",
273
- self.styles['SectionHeading']))
274
-
275
- for term in result['unfavorable_terms'][:8]: # Limit to 8 items
276
- bullet_text = f"<bullet>•</bullet> <b>{term.get('clause_reference', term['term'])}:</b> {term['explanation']}"
277
- elements.append(Paragraph(bullet_text, self.styles['BulletPoint']))
278
- elements.append(Spacer(1, 0.05*inch))
 
 
 
 
 
 
 
 
 
 
279
 
 
 
 
280
  elements.append(Spacer(1, 0.2*inch))
281
 
282
  # Missing Protections
283
- elements.append(Paragraph("Missing Protections",
284
- self.styles['SectionHeading']))
285
-
286
- for protection in result['missing_protections'][:6]: # Limit to 6 items
287
- bullet_text = f"<bullet>•</bullet> <b>{protection['protection']}:</b> {protection['explanation']}"
288
- elements.append(Paragraph(bullet_text, self.styles['BulletPoint']))
289
- elements.append(Spacer(1, 0.05*inch))
290
-
291
- return elements
292
-
293
- def _build_page_2(self, result: Dict) -> List:
294
- """Build page 2 content: Negotiation Points"""
295
- elements = []
296
-
297
- elements.append(Paragraph("Negotiation Points",
298
- self.styles['SectionHeading']))
299
- elements.append(Spacer(1, 0.1*inch))
300
 
301
- negotiation_playbook = result.get('negotiation_playbook', {})
302
- negotiation_points = negotiation_playbook.get('critical_points', [])
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- if negotiation_points:
305
- for point in negotiation_points[:7]: # Limit to 7 points
306
- bullet_text = f"<bullet>•</bullet> {point['issue']}: {point['rationale']}"
307
- elements.append(Paragraph(bullet_text, self.styles['BulletPoint']))
308
- elements.append(Spacer(1, 0.1*inch))
309
  else:
310
- # Fallback to unfavorable terms if negotiation points not available
311
- for term in result['unfavorable_terms'][:7]:
312
- if term.get('suggested_fix'):
313
- bullet_text = f"<bullet>•</bullet> {term['term']}: {term['suggested_fix']}"
314
- elements.append(Paragraph(bullet_text, self.styles['BulletPoint']))
315
- elements.append(Spacer(1, 0.1*inch))
316
-
317
  return elements
318
-
 
319
  def _build_page_3(self, result: Dict) -> List:
320
- """Build page 3 content: Risk Category Breakdown"""
321
- elements = []
322
-
323
- elements.append(Paragraph("Risk Category Breakdown",
324
- self.styles['SectionHeading']))
325
- elements.append(Spacer(1, 0.15*inch))
326
-
327
- # Create table data
328
- table_data = [
329
- [
330
- Paragraph('<b>Category</b>', self.styles['TableHeader']),
331
- Paragraph('<b>Score</b>', self.styles['TableHeader']),
332
- Paragraph('<b>Summary</b>', self.styles['TableHeader'])
333
- ]
334
- ]
335
-
336
- risk_breakdown = result['risk_analysis'].get('risk_breakdown', [])
337
 
338
- for category in risk_breakdown:
339
- score_color = self._get_risk_color(category['score'])
 
 
 
 
 
 
 
340
 
341
- category_cell = Paragraph(category['category'], self.styles['BodyText'])
342
- score_cell = Paragraph(
343
- f'<font color="{score_color.hexval()}"><b>{category["score"]}</b></font>',
344
- self.styles['TableHeader']
345
- )
346
- summary_cell = Paragraph(category['summary'], self.styles['BodyText'])
 
 
 
 
 
 
 
 
347
 
348
- table_data.append([category_cell, score_cell, summary_cell])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
- # Create table
351
- table = Table(table_data, colWidths=[1.8*inch, 0.7*inch, 4*inch])
352
- table.setStyle(TableStyle([
353
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#f5f5f5')),
354
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#1a1a1a')),
355
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
356
- ('ALIGN', (1, 0), (1, -1), 'CENTER'),
357
- ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
358
- ('FONTSIZE', (0, 0), (-1, -1), 10),
359
- ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
360
- ('TOPPADDING', (0, 1), (-1, -1), 10),
361
- ('BOTTOMPADDING', (0, 1), (-1, -1), 10),
362
- ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#e5e5e5')),
363
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
364
- ]))
365
 
366
- elements.append(table)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
  return elements
369
-
370
- def _build_clause_analysis(self, analysis_result):
371
- """Build clause analysis section with null safety"""
372
- story = []
 
 
 
 
 
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  clauses = analysis_result.get('clauses', [])
 
375
  if not clauses:
 
376
  return story
377
-
378
- story.append(Paragraph("Clause-by-Clause Analysis", self.styles['Heading2']))
379
-
380
- for clause in clauses:
381
- # Add null safety for clause reference
382
- clause_ref = clause.get('reference')
383
- if clause_ref is None:
384
- clause_ref = "Unknown Reference"
385
-
386
- # Add null safety for category
387
- clause_category = clause.get('category', 'Unknown Category')
388
-
389
- # Add null safety for text
390
- clause_text = clause.get('text', 'No text available')
391
- if clause_text is None:
392
- clause_text = 'No text available'
393
 
394
- # Add null safety for confidence
395
- confidence = clause.get('confidence', 0)
396
- if confidence is None:
397
- confidence = 0
398
 
399
- # Create table for this clause - use fixed widths instead of page_width
400
- clause_data = [
401
- [
402
- Paragraph(f"<b>{clause_ref} {clause_category}</b>", self.styles['BodyText']),
403
- Paragraph(f"<b>{int(confidence * 100)}% confidence</b>", self.styles['BodyText'])
404
- ],
405
- [
406
- Paragraph(clause_text, self.styles['BodyText']),
407
- ''
408
- ]
409
- ]
410
 
411
- # Add risk indicators if present
412
- risk_indicators = clause.get('risk_indicators', [])
413
- if risk_indicators and any(risk_indicators):
414
- clause_data.append([
415
- Paragraph(f"<b>Risk Indicators:</b> {', '.join([ri for ri in risk_indicators if ri])}", self.styles['SmallText']),
416
- ''
417
- ])
418
 
419
- # Use fixed column widths instead of page_width
420
- clause_table = Table(clause_data, colWidths=[400, 150]) # Fixed widths in points
421
- clause_table.setStyle(TableStyle([
422
- ('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
423
- ('VALIGN', (0, 0), (-1, -1), 'TOP'),
424
- ('ALIGN', (1, 0), (1, 0), 'RIGHT'),
425
- ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
426
- ('FONTSIZE', (0, 0), (-1, 0), 10),
427
- ('ROWBACKGROUNDS', (0, 0), (-1, -1), [colors.white, colors.whitesmoke]),
428
- ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
429
- ('SPAN', (0, 1), (-1, 1)), # Span the text across both columns
430
- ]))
431
 
432
- story.append(clause_table)
433
- story.append(Spacer(1, 0.2 * inch))
434
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  return story
436
 
437
 
438
- def generate_pdf_report(analysis_result: Dict[str, Any],
439
- output_path: Optional[str] = None) -> BytesIO:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  """
441
  Convenience function to generate PDF report
442
-
443
- Args:
444
- analysis_result: Complete analysis result from the API
445
- output_path: Optional file path to save PDF
446
-
 
 
447
  Returns:
448
- BytesIO buffer containing the PDF
 
449
  """
450
  generator = PDFReportGenerator()
451
- return generator.generate_report(analysis_result, output_path)
452
 
 
 
1
  # DEPENDENCIES
2
  import os
3
+ import math
4
  from typing import Any
5
  from io import BytesIO
6
  from typing import Dict
 
13
  from reportlab.platypus import Table
14
  from reportlab.lib.units import inch
15
  from reportlab.platypus import Spacer
16
+ from reportlab.lib.pagesizes import A4
17
  from reportlab.lib.enums import TA_LEFT
18
  from reportlab.platypus import Paragraph
19
  from reportlab.platypus import PageBreak
20
  from reportlab.graphics import renderPDF
21
  from reportlab.platypus import TableStyle
22
  from reportlab.lib.enums import TA_CENTER
23
+ from reportlab.graphics.shapes import Path
24
  from reportlab.lib.enums import TA_JUSTIFY
25
  from reportlab.lib.pagesizes import letter
26
+ from reportlab.lib.utils import simpleSplit
27
  from reportlab.platypus import KeepTogether
28
  from reportlab.graphics.shapes import Circle
29
  from reportlab.graphics.shapes import String
30
+ from reportlab.lib.pagesizes import landscape
31
  from reportlab.graphics.shapes import Drawing
32
  from reportlab.lib.styles import ParagraphStyle
33
  from reportlab.platypus import SimpleDocTemplate
34
+ from reportlab.platypus.flowables import PageBreak
35
+ from reportlab.platypus.flowables import KeepInFrame
36
  from reportlab.lib.styles import getSampleStyleSheet
37
+ from reportlab.platypus import Table as PlatypusTable
38
 
39
 
40
  class PDFReportGenerator:
41
  """
42
+ Professional-grade PDF report generator matching sample style exactly
43
  """
44
  def __init__(self):
45
+ self.styles = getSampleStyleSheet()
 
46
 
47
+ self._setup_custom_styles()
48
+
49
+ self.page_width = letter[0]
50
+ self.page_height = letter[1]
51
+ self.margin_left = 0.75 * inch
52
+ self.margin_right = 0.75 * inch
53
+ self.margin_top = 1 * inch
54
+ self.margin_bottom = 1 * inch
55
+ self.content_width = self.page_width - self.margin_left - self.margin_right
56
+ self.content_height = self.page_height - self.margin_top - self.margin_bottom
57
+
58
 
59
  def _setup_custom_styles(self):
60
  """
61
+ Setup custom paragraph styles with precise control
62
  """
63
  # Title style
64
  self.styles.add(ParagraphStyle(name = 'ReportTitle',
 
70
  fontName = 'Helvetica-Bold',
71
  )
72
  )
73
+
74
  # Section heading
75
  self.styles.add(ParagraphStyle(name = 'SectionHeading',
76
  parent = self.styles['Heading2'],
 
81
  fontName = 'Helvetica-Bold',
82
  )
83
  )
84
+
85
+ # Sub-section heading
86
+ self.styles.add(ParagraphStyle(name = 'SubSectionHeading',
87
+ parent = self.styles['Normal'],
88
+ fontSize = 12,
89
+ textColor = colors.HexColor('#333333'),
90
+ spaceAfter = 8,
91
+ spaceBefore = 12,
92
+ fontName = 'Helvetica-Bold',
93
+ )
94
+ )
95
+
96
  # Body text
97
+ self.styles.add(ParagraphStyle(name = 'CustomBodyText',
98
+ parent = self.styles['Normal'],
99
+ fontSize = 10,
100
+ leading = 14,
101
+ textColor = colors.HexColor('#333333'),
102
+ alignment = TA_JUSTIFY,
103
+ fontName = 'Helvetica',
104
+ leftIndent = 0,
105
+ rightIndent = 0,
106
+ )
107
+ )
108
+
109
+ # Small text style
110
+ self.styles.add(ParagraphStyle(name = 'SmallText',
111
+ parent = self.styles['Normal'],
112
+ fontSize = 8,
113
+ leading = 10,
114
+ textColor = colors.HexColor('#666666'),
115
+ fontName = 'Helvetica',
116
+ )
117
+ )
118
+
119
  # Bullet point
120
+ self.styles.add(ParagraphStyle(name = 'BulletPoint',
121
+ parent = self.styles['Normal'],
122
+ fontSize = 10,
123
+ leading = 14,
124
+ textColor = colors.HexColor('#333333'),
125
+ leftIndent = 20,
126
+ bulletIndent = 10,
127
+ bulletFontName = 'Helvetica',
128
+ bulletFontSize = 10,
129
+ bulletColor = colors.black,
130
+ spaceAfter = 4,
131
+ fontName = 'Helvetica',
132
+ )
133
+ )
134
+
135
  # Table header
136
+ self.styles.add(ParagraphStyle(name = 'TableHeader',
137
+ parent = self.styles['Normal'],
138
+ fontSize = 10,
139
+ textColor = colors.HexColor('#1a1a1a'),
140
+ fontName = 'Helvetica-Bold',
141
+ alignment = TA_LEFT,
142
+ )
143
+ )
144
+
145
+ # Table cell
146
+ self.styles.add(ParagraphStyle(name = 'TableCell',
147
+ parent = self.styles['Normal'],
148
+ fontSize = 9,
149
+ textColor = colors.HexColor('#333333'),
150
+ fontName = 'Helvetica',
151
+ alignment = TA_LEFT,
152
+ spaceAfter = 2,
153
+ )
154
+ )
155
+
156
  # Footer
157
+ self.styles.add(ParagraphStyle(name = 'Footer',
158
+ parent = self.styles['Normal'],
159
+ fontSize = 8,
160
+ textColor = colors.HexColor('#666666'),
161
+ alignment = TA_CENTER,
162
+ fontName = 'Helvetica',
163
+ )
164
+ )
165
+
166
+ # Risk indicator style
167
+ self.styles.add(ParagraphStyle(name = 'RiskIndicator',
168
+ parent = self.styles['Normal'],
169
+ fontSize = 9,
170
+ textColor = colors.HexColor('#dc2626'),
171
+ fontName = 'Helvetica-Bold',
172
+ backColor = colors.HexColor('#fef2f2'),
173
+ borderPadding = 5,
174
+ spaceAfter = 4,
175
+ )
176
+ )
177
+
178
+ # Keyword style
179
+ self.styles.add(ParagraphStyle(name = 'Keyword',
180
+ parent = self.styles['Normal'],
181
+ fontSize = 9,
182
+ textColor = colors.HexColor('#1e40af'),
183
+ fontName = 'Helvetica',
184
+ backColor = colors.HexColor('#eff6ff'),
185
+ borderPadding = 3,
186
+ )
187
+ )
188
+
189
+
190
  def _draw_risk_score_circle(self, score: int) -> Drawing:
191
+ """
192
+ Draw the risk score circle graphic with correct fill percentage
193
+ """
194
+ d = Drawing(150, 150)
195
 
196
+ # Define circle properties
197
+ center_x, center_y = 75, 75
198
+ outer_radius = 60
199
+ inner_radius = 45
200
+ thickness = 15 # Thickness of the colored ring
201
+
202
  # Determine color based on score
203
+ if (score >= 80):
204
+ color = colors.HexColor('#dc2626') # Red
205
+
206
+ elif (score >= 60):
207
+ color = colors.HexColor('#f97316') # Orange
208
+
209
+ elif (score >= 40):
210
+ color = colors.HexColor('#ca8a04') # Amber
211
+
212
  else:
213
+ color = colors.HexColor('#16a34a') # Green
214
+
215
+ # Draw background circle (light grey)
216
+ bg_circle = Circle(center_x, center_y, outer_radius)
217
+ bg_circle.fillColor = colors.HexColor('#f0f0f0')
218
  bg_circle.strokeColor = None
219
+
220
  d.add(bg_circle)
221
+
222
+ # Draw colored arc representing the score percentage: The arc is drawn from 0 degrees (3 o'clock) clockwise
223
+ sweep_angle = (score / 100.0) * 360
224
+
225
+ # Start angle is 90 degrees counter-clockwise from 3 o'clock (i.e., 12 o'clock)
226
+ start_angle = 90
227
+
228
+ # Clockwise direction
229
+ end_angle = start_angle - sweep_angle
230
+
231
+ # Ensure start angle is greater than end angle for clockwise sweep
232
+ if (start_angle < end_angle):
233
+ end_angle = start_angle - sweep_angle
234
+ extent = -sweep_angle
235
+
236
+ else:
237
+ # Clockwise sweep
238
+ extent = -sweep_angle
239
+
240
+ # Create a path for the arc (ring segment)
241
+ p = Path()
242
+
243
+ # Calculate start and end points using trigonometry
244
+ start_rad = math.radians(start_angle)
245
+
246
+ # Correct end angle for clockwise
247
+ end_rad = math.radians(start_angle - sweep_angle)
248
+
249
+ # Move to the outer perimeter at the start angle
250
+ start_outer_x = center_x + outer_radius * math.cos(start_rad)
251
+ start_outer_y = center_y + outer_radius * math.sin(start_rad)
252
+
253
+ p.moveTo(start_outer_x, start_outer_y)
254
 
255
+ # At least 10 segments, or 1 per 5 degrees of sweep
256
+ num_segments = max(10, int(sweep_angle / 5))
257
+ angle_step = sweep_angle / num_segments
258
+
259
+ # Draw outer arc as line segments
260
+ for i in range(1, num_segments + 1):
261
+ # Clockwise
262
+ current_angle_deg = start_angle - (i * angle_step)
263
+ current_angle_rad = math.radians(current_angle_deg)
264
+ x = center_x + outer_radius * math.cos(current_angle_rad)
265
+ y = center_y + outer_radius * math.sin(current_angle_rad)
266
+
267
+ p.lineTo(x, y)
268
+
269
+ # Draw inner arc as line segments (reverse direction)
270
+ for i in range(num_segments, -1, -1):
271
+ # Clockwise
272
+ current_angle_deg = start_angle - (i * angle_step)
273
+ current_angle_rad = math.radians(current_angle_deg)
274
+ x = center_x + inner_radius * math.cos(current_angle_rad)
275
+ y = center_y + inner_radius * math.sin(current_angle_rad)
276
+ p.lineTo(x, y)
277
+
278
+ p.closePath()
279
+ p.fillColor = color
280
+ p.strokeColor = None
281
+ d.add(p)
282
+
283
+ # Draw inner white circle : Slightly smaller to fit inside the ring
284
+ inner_circle = Circle(center_x, center_y, inner_radius - 2)
285
+ inner_circle.fillColor = colors.white
286
  inner_circle.strokeColor = None
287
  d.add(inner_circle)
288
+
289
+ # Draw score text in the center
290
+ score_text = String(center_x, center_y - 10, str(score), textAnchor='middle')
291
+ score_text.fontSize = 36
292
+ score_text.fontName = 'Helvetica-Bold'
293
+ score_text.fillColor = color
294
  d.add(score_text)
295
+
296
+ # Draw "/100" text slightly below the score
297
+ subtitle_text = String(center_x, center_y - 28, "/100", textAnchor='middle')
298
+ subtitle_text.fontSize = 16
299
+ subtitle_text.fontName = 'Helvetica'
300
+ subtitle_text.fillColor = colors.HexColor('#666666')
301
+ d.add(subtitle_text)
302
+
303
  return d
304
+
305
+
306
  def _get_risk_color(self, score: int) -> colors.Color:
307
+ """
308
+ Get color based on risk score
309
+ """
310
+ if (score >= 80):
311
  return colors.HexColor('#dc2626')
312
+
313
+ elif (score >= 60):
314
  return colors.HexColor('#f97316')
315
+
316
+ elif (score >= 40):
317
  return colors.HexColor('#ca8a04')
318
+
319
  else:
320
  return colors.HexColor('#16a34a')
321
+
322
+
323
  def _create_header_footer(self, canvas, doc):
324
+ """
325
+ Add header and footer to each page with consistent positioning
326
+ """
327
  canvas.saveState()
328
+
329
  # Header
330
  canvas.setFont('Helvetica-Bold', 12)
331
+ canvas.setFillColor(colors.black)
332
+ canvas.drawString(self.margin_left, self.page_height - 0.5 * inch, "AI Powered Contract Risk Analysis Report")
333
+
334
  # Footer
335
  canvas.setFont('Helvetica', 8)
336
  canvas.setFillColor(colors.HexColor('#666666'))
337
+
338
+ # Page number
339
  page_num = f"Page {doc.page}"
340
+ canvas.drawString(self.page_width - self.margin_right - 1*inch, 0.5 * inch, page_num)
341
+
342
+ # Disclaimer
343
  disclaimer = "For informational purposes only. Not legal advice."
344
+ canvas.drawCentredString(self.page_width / 2.0, 0.5 * inch, disclaimer)
345
+
346
  canvas.restoreState()
347
 
348
+
349
+ def generate_report(self, analysis_result: Dict[str, Any], output_path: Optional[str] = None) -> BytesIO:
 
350
  """
351
  Generate PDF report from analysis results
352
+
353
+ Arguments:
354
+ ----------
355
+ analysis_result { dict } : Analysis result dictionary from the API
356
+
357
+ output_path { str } : Optional file path to save PDF
358
+
359
  Returns:
360
+ --------
361
+ { BytesIO } : Buffer containing the PDF
362
  """
363
  # Create buffer
364
  buffer = BytesIO()
365
+
366
  # Create document
367
+ doc = SimpleDocTemplate(buffer if not output_path else output_path,
368
+ pagesize = letter,
369
+ rightMargin = self.margin_right,
370
+ leftMargin = self.margin_left,
371
+ topMargin = self.margin_top,
372
+ bottomMargin = self.margin_bottom,
373
+ )
374
+
 
375
  # Build story
376
+ story = list()
377
+
378
+ # Page 1: Title, Risk Score, Executive Summary, Keywords
379
  story.extend(self._build_page_1(analysis_result))
380
  story.append(PageBreak())
381
+
382
+ # Page 2: Unfavorable Terms, Missing Protections
383
  story.extend(self._build_page_2(analysis_result))
384
  story.append(PageBreak())
385
+
386
+ # Page 3: Negotiation Points
387
  story.extend(self._build_page_3(analysis_result))
 
 
388
  story.append(PageBreak())
389
+
390
+ # Page 4: Risk Category Breakdown Table
391
+ story.extend(self._build_page_4(analysis_result))
392
+ story.append(PageBreak())
393
+
394
+ # Page 5: Clause Interpretations
395
+ story.extend(self._build_page_5(analysis_result))
396
+ story.append(PageBreak())
397
+
398
+ # Page 6+: Detailed Clause Analysis
399
+ story.extend(self._build_clause_analysis_pages(analysis_result))
400
+
401
  # Build PDF
402
+ doc.build(story, onFirstPage = self._create_header_footer, onLaterPages = self._create_header_footer)
403
+
 
404
  # If using buffer, seek to beginning
405
  if not output_path:
406
  buffer.seek(0)
407
  return buffer
408
+
409
  return buffer
410
+
411
+
412
  def _build_page_1(self, result: Dict) -> List:
413
+ """
414
+ Build page 1 content: Title, Risk Score, Executive Summary, Keywords
415
+ """
416
+ elements = list()
417
+
418
  # Title
419
+ elements.append(Paragraph("AI Contract Risk Analysis Report", self.styles['ReportTitle']))
 
420
  elements.append(Spacer(1, 0.1*inch))
421
+
422
+ # Contract Info
423
+ classification = result.get('classification', {})
424
+ contract_type = classification.get('subcategory', 'Unknown')
425
+ confidence = classification.get('confidence', 0) * 100
426
 
427
+ info_text = f"<b>Contract Type:</b> {contract_type.replace('_', ' ').title()} | <b>Confidence:</b> {confidence:.1f}%"
428
+ elements.append(Paragraph(info_text, self.styles['CustomBodyText']))
 
 
 
 
 
 
 
 
429
  elements.append(Spacer(1, 0.2*inch))
430
+
431
+ # Risk Score Circle and Executive Summary Side-by-Side
432
+ risk_analysis = result.get('risk_analysis', {})
433
+ overall_score = risk_analysis.get('overall_score', 0)
434
+ risk_level = risk_analysis.get('risk_level', 'UNKNOWN')
435
+
436
+ score_frame = KeepInFrame(1.5*inch, 1.5*inch, [self._draw_risk_score_circle(overall_score)])
437
+ summary_para = Paragraph(f"<b>Overall Risk Score: {overall_score}/100 ({risk_level})</b><br/><br/>{result.get('executive_summary', 'No executive summary available.')}",
438
+ self.styles['CustomBodyText']
439
+ )
440
+
441
+ top_row = PlatypusTable([[score_frame, summary_para]], colWidths=[1.6*inch, 4.5*inch])
442
+
443
+ top_row.setStyle(TableStyle([('VALIGN', (0, 0), (-1, -1), 'TOP'),
444
+ ('LEFTPADDING', (0, 0), (-1, -1), 0),
445
+ ('RIGHTPADDING', (0, 0), (-1, -1), 0),
446
+ ('TOPPADDING', (0, 0), (-1, -1), 0),
447
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 0),
448
+ ])
449
+ )
450
+
451
+ elements.append(top_row)
452
+ elements.append(Spacer(1, 0.3*inch))
453
+
454
+ # Detected Keywords
455
+ detected_keywords = result.get('classification', {}).get('detected_keywords', [])
456
 
457
+ if detected_keywords:
458
+ elements.append(Paragraph("Detected Keywords", self.styles['SectionHeading']))
459
+ keywords_text = ", ".join([f"<font color='#1e40af'><b>{kw}</b></font>" for kw in detected_keywords[:15]]) # Show first 15 keywords
460
+ elements.append(Paragraph(keywords_text, self.styles['CustomBodyText']))
461
+
462
+ return elements
463
+
464
+
465
+ def _build_page_2(self, result: Dict) -> List:
466
+ """
467
+ Build page 2: Unfavorable Terms and Missing Protections
468
+ """
469
+ elements = list()
470
+
471
  # Unfavorable Terms
472
+ elements.append(Paragraph("Unfavorable Terms", self.styles['SectionHeading']))
473
+
474
+ unfav_terms = result.get('unfavorable_terms', [])
475
+
476
+ if unfav_terms:
477
+ # Sort by severity and risk score
478
+ sorted_terms = sorted(unfav_terms, key=lambda x: (x.get('severity', 'low') != 'high', -x.get('risk_score', 0)))
479
+
480
+ for term in sorted_terms:
481
+ severity = term.get('severity', 'unknown').upper()
482
+ risk_score = term.get('risk_score', 0)
483
+ clause_ref = term.get('clause_reference', 'N/A')
484
+ explanation = term.get('explanation', 'No explanation provided.')
485
+
486
+ term_text = f"<b>{clause_ref}</b> | <font color='{self._get_severity_color(severity).hexval()}'>{severity} SEVERITY</font> | Risk Score: {risk_score}<br/>{explanation}"
487
+
488
+ elements.append(Paragraph(term_text, self.styles['BulletPoint']))
489
 
490
+ else:
491
+ elements.append(Paragraph("No unfavorable terms identified.", self.styles['CustomBodyText']))
492
+
493
  elements.append(Spacer(1, 0.2*inch))
494
 
495
  # Missing Protections
496
+ elements.append(Paragraph("Missing Protections", self.styles['SectionHeading']))
497
+
498
+ missing_protections = result.get('missing_protections', [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
 
500
+ if missing_protections:
501
+ # Sort by importance and risk score
502
+ sorted_protections = sorted(missing_protections, key=lambda x: (x.get('importance', 'medium') != 'critical', -x.get('risk_score', 0)))
503
+
504
+ for prot in sorted_protections:
505
+ importance = prot.get('importance', 'medium').upper()
506
+ risk_score = prot.get('risk_score', 0)
507
+ protection_name = prot.get('protection', 'N/A')
508
+ explanation = prot.get('explanation', 'No explanation provided.')
509
+
510
+ prot_text = f"<b>{protection_name}</b> | <font color='{self._get_importance_color(importance).hexval()}'>{importance} IMPORTANCE</font> | Risk Score: {risk_score}<br/>{explanation}"
511
+
512
+ elements.append(Paragraph(prot_text, self.styles['BulletPoint']))
513
 
 
 
 
 
 
514
  else:
515
+ elements.append(Paragraph("No missing protections identified.", self.styles['CustomBodyText']))
516
+
 
 
 
 
 
517
  return elements
518
+
519
+
520
  def _build_page_3(self, result: Dict) -> List:
521
+ """
522
+ Build page 3: Negotiation Points as a structured table
523
+ """
524
+ elements = list()
525
+
526
+ elements.append(Paragraph("Negotiation Strategy", self.styles['SectionHeading']))
 
 
 
 
 
 
 
 
 
 
 
527
 
528
+ negotiation_points = result.get('negotiation_points', [])
529
+
530
+ if negotiation_points:
531
+ # Prepare table data: Priority, Issue, Current Language, Proposed Language
532
+ table_data = [[Paragraph('<b>Priority</b>', self.styles['TableHeader']),
533
+ Paragraph('<b>Issue</b>', self.styles['TableHeader']),
534
+ Paragraph('<b>Current</b>', self.styles['TableHeader']),
535
+ Paragraph('<b>Proposed</b>', self.styles['TableHeader']),
536
+ ]]
537
 
538
+ # Sort by priority
539
+ sorted_points = sorted(negotiation_points, key=lambda x: x.get('priority', 999))
540
+
541
+ for point in sorted_points:
542
+ priority = str(point.get('priority', 'N/A'))
543
+ issue = Paragraph(point.get('issue', 'N/A'), self.styles['TableCell'])
544
+ current = Paragraph(self._truncate_text(point.get('current_language', 'Not specified'), 100), self.styles['TableCell'])
545
+ proposed = Paragraph(self._truncate_text(point.get('proposed_language', 'Request balanced language'), 100), self.styles['TableCell'])
546
+
547
+ table_data.append([Paragraph(priority, self.styles['TableCell']), issue, current, proposed])
548
+
549
+ # Create the table with appropriate column widths
550
+ col_widths = [0.5*inch, 1.5*inch, 1.5*inch, 2*inch]
551
+ table = Table(table_data, colWidths=col_widths)
552
 
553
+ table.setStyle(TableStyle([('BACKGROUND', (0,0), (-1,0), colors.HexColor('#f5f5f5')),
554
+ ('TEXTCOLOR', (0,0), (-1,0), colors.HexColor('#1a1a1a')),
555
+ ('ALIGN', (0,0), (-1,-1), 'LEFT'),
556
+ ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
557
+ ('FONTSIZE', (0,0), (-1,0), 10),
558
+ ('BOTTOMPADDING', (0,0), (-1,0), 12),
559
+ ('GRID', (0,0), (-1,-1), 1, colors.HexColor('#d1d5db')),
560
+ ('VALIGN', (0,0), (-1,-1), 'TOP'),
561
+ ('LEFTPADDING', (0,0), (-1,-1), 6),
562
+ ('RIGHTPADDING', (0,0), (-1,-1), 6),
563
+ ('TOPPADDING', (0,0), (-1,-1), 6),
564
+ ('BOTTOMPADDING', (0,0), (-1,-1), 6),
565
+ ])
566
+ )
567
+
568
+ elements.append(table)
569
+
570
+ else:
571
+ elements.append(Paragraph("No negotiation points available.", self.styles['CustomBodyText']))
572
+
573
+ return elements
574
+
575
+
576
+ def _build_page_4(self, result: Dict) -> List:
577
+ """
578
+ Build page 4: Risk Category Breakdown Table
579
+ """
580
+ elements = list()
581
+
582
+ elements.append(Paragraph("Risk Category Breakdown", self.styles['SectionHeading']))
583
 
584
+ risk_breakdown = result.get('risk_analysis', {}).get('risk_breakdown', [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
 
586
+ if risk_breakdown:
587
+ # Prepare table data
588
+ table_data = [[Paragraph('<b>Category</b>', self.styles['TableHeader']),
589
+ Paragraph('<b>Score</b>', self.styles['TableHeader']),
590
+ Paragraph('<b>Summary</b>', self.styles['TableHeader']),
591
+ ]]
592
+
593
+ for item in risk_breakdown:
594
+ category = item.get('category', 'N/A').replace('_', ' ').title()
595
+ score = item.get('score', 0)
596
+ summary = item.get('summary', 'No summary available.')
597
+ score_color = self._get_risk_color(score)
598
+ score_para = Paragraph(f'<font color="{score_color.hexval()}">{score}/100</font>', self.styles['TableHeader'])
599
+ summary_para = Paragraph(summary, self.styles['TableCell'])
600
+
601
+ table_data.append([Paragraph(category, self.styles['TableCell']), score_para, summary_para])
602
+
603
+ # Create table
604
+ col_widths = [2*inch, 1*inch, 3.5*inch]
605
+ table = Table(table_data, colWidths = col_widths)
606
+
607
+ # Table Style
608
+ table.setStyle(TableStyle([('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#f5f5f5')),
609
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#1a1a1a')),
610
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
611
+ ('ALIGN', (1, 0), (1, -1), 'CENTER'),
612
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
613
+ ('FONTSIZE', (0, 0), (-1, 0), 10),
614
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
615
+ ('TOPPADDING', (0, 1), (-1, -1), 8),
616
+ ('BOTTOMPADDING', (0, 1), (-1, -1), 8),
617
+ ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#d1d5db')),
618
+ ('VALIGN', (0, 0), (-1, -1), 'TOP'),
619
+ ])
620
+ )
621
+
622
+ elements.append(table)
623
+
624
+ else:
625
+ elements.append(Paragraph("No risk breakdown data available.", self.styles['CustomBodyText']))
626
 
627
  return elements
628
+
629
+
630
+ def _build_page_5(self, result: Dict) -> List:
631
+ """
632
+ Build page 5: Clause Interpretations
633
+ """
634
+ elements = list()
635
+
636
+ elements.append(Paragraph("Clause Interpretations", self.styles['SectionHeading']))
637
 
638
+ clause_interpretations = result.get('clause_interpretations', [])
639
+
640
+ if clause_interpretations:
641
+ for i, clause in enumerate(clause_interpretations):
642
+ ref = clause.get('clause_reference', 'N/A')
643
+ plain_english = clause.get('plain_english_summary', 'No summary available.')
644
+ favorability = clause.get('favorability', 'neutral')
645
+
646
+ # Color code based on favorability
647
+ fav_color = self._get_favorability_color(favorability)
648
+
649
+ clause_text = f"<b>{ref}</b> | <font color='{fav_color.hexval()}'>{favorability.upper()}</font><br/>{plain_english}"
650
+
651
+ elements.append(Paragraph(clause_text, self.styles['BulletPoint']))
652
+
653
+ # Add key points if available
654
+ key_points = clause.get('key_points', [])
655
+ if key_points:
656
+ for point in key_points:
657
+ elements.append(Paragraph(f"• {point}", self.styles['SmallText']))
658
+
659
+ elements.append(Spacer(1, 0.1*inch))
660
+
661
+ # Break if we've added enough content: After 8 clauses, it's usually enough for one page
662
+ if i >= 7:
663
+ remaining = len(clause_interpretations) - 8
664
+ if (remaining > 0):
665
+ elements.append(Paragraph(f"... and {remaining} more clause interpretations", self.styles['SmallText']))
666
+ break
667
+
668
+ else:
669
+ elements.append(Paragraph("No clause interpretations available.", self.styles['CustomBodyText']))
670
+
671
+ return elements
672
+
673
+
674
+ def _build_clause_analysis_pages(self, analysis_result):
675
+ """
676
+ Build dynamic pages for detailed clause-by-clause analysis
677
+ """
678
+ story = list()
679
  clauses = analysis_result.get('clauses', [])
680
+
681
  if not clauses:
682
+ story.append(Paragraph("No clauses analyzed.", self.styles['CustomBodyText']))
683
  return story
684
+
685
+ story.append(Paragraph("Detailed Clause Analysis", self.styles['SectionHeading']))
686
+
687
+ for i, clause in enumerate(clauses):
688
+ # Use KeepTogether to ensure a clause block stays on one page if possible
689
+ clause_elements = list()
690
+
691
+ # Clause Reference and Category as Header
692
+ ref = clause.get('reference', 'N/A')
693
+ category = clause.get('category', 'N/A').replace('_', ' ').title()
694
+ confidence = clause.get('confidence', 0)
695
+ risk_score = clause.get('risk_score', 0)
 
 
 
 
696
 
697
+ ref_cat_text = f"{ref} {category} | Confidence: {confidence:.1f} | Risk Score: {risk_score}"
698
+ clause_header = Paragraph(ref_cat_text, self.styles['SubSectionHeading'])
 
 
699
 
700
+ clause_elements.append(clause_header)
701
+
702
+ # Original Clause Text
703
+ clause_text = clause.get('text', 'No text available.')
 
 
 
 
 
 
 
704
 
705
+ # Truncate very long clause text
706
+ if len(clause_text) > 500:
707
+ clause_text = clause_text[:500] + "... [truncated]"
 
 
 
 
708
 
709
+ clause_para = Paragraph(f"<b>Original Text:</b> {clause_text}", self.styles['CustomBodyText'])
 
 
 
 
 
 
 
 
 
 
 
710
 
711
+ clause_elements.append(clause_para)
712
+
713
+ # Risk Indicators (if any)
714
+ risk_inds = clause.get('risk_indicators', [])
715
+ if risk_inds:
716
+ ri_text = f"<b>Risk Indicators:</b> {', '.join(risk_inds)}"
717
+ ri_para = Paragraph(ri_text, self.styles['RiskIndicator'])
718
+
719
+ clause_elements.append(ri_para)
720
+
721
+ # Add Spacer between clauses
722
+ clause_elements.append(Spacer(1, 0.15 * inch))
723
+
724
+ # Wrap in KeepTogether
725
+ kt_flowable = KeepTogether(clause_elements)
726
+ story.append(kt_flowable)
727
+
728
+ # Add page break every 5 clauses to prevent overflow
729
+ if (i + 1) % 5 == 0 and (i + 1) < len(clauses):
730
+ story.append(PageBreak())
731
+
732
  return story
733
 
734
 
735
+ def _get_severity_color(self, severity: str) -> colors.Color:
736
+ """
737
+ Get color based on severity level
738
+ """
739
+ severity = severity.lower()
740
+ if (severity == 'high'):
741
+ return colors.HexColor('#dc2626')
742
+
743
+ elif (severity == 'medium'):
744
+ return colors.HexColor('#f97316')
745
+
746
+ else:
747
+ return colors.HexColor('#16a34a')
748
+
749
+
750
+ def _get_importance_color(self, importance: str) -> colors.Color:
751
+ """
752
+ Get color based on importance level
753
+ """
754
+ importance = importance.lower()
755
+
756
+ if (importance == 'critical'):
757
+ return colors.HexColor('#dc2626')
758
+
759
+ elif (importance == 'high'):
760
+ return colors.HexColor('#f97316')
761
+
762
+ elif (importance == 'medium'):
763
+ return colors.HexColor('#ca8a04')
764
+
765
+ else:
766
+ return colors.HexColor('#16a34a')
767
+
768
+
769
+ def _get_favorability_color(self, favorability: str) -> colors.Color:
770
+ """
771
+ Get color based on favorability
772
+ """
773
+ favorability = favorability.lower()
774
+ if favorability == 'favorable':
775
+ return colors.HexColor('#16a34a')
776
+ elif favorability == 'unfavorable':
777
+ return colors.HexColor('#dc2626')
778
+ else:
779
+ return colors.HexColor('#ca8a04')
780
+
781
+
782
+ def _truncate_text(self, text: str, max_length: int) -> str:
783
+ """
784
+ Truncate text to specified length
785
+ """
786
+ if len(text) <= max_length:
787
+ return text
788
+ return text[:max_length-3] + "..."
789
+
790
+
791
+ def generate_pdf_report(analysis_result: Dict[str, Any], output_path: Optional[str] = None) -> BytesIO:
792
  """
793
  Convenience function to generate PDF report
794
+
795
+ Arguments:
796
+ ----------
797
+ analysis_result { dict } : Complete analysis result from the API
798
+
799
+ output_path { str } : Optional file path to save PDF
800
+
801
  Returns:
802
+ --------
803
+ { BytesIO } : Buffer containing the PDF
804
  """
805
  generator = PDFReportGenerator()
 
806
 
807
+ return generator.generate_report(analysis_result, output_path)
requirements.txt CHANGED
@@ -1,32 +1,59 @@
1
- # FastAPI & Server
2
- fastapi==0.104.1
3
- uvicorn[standard]==0.24.0
4
- python-multipart==0.0.6
 
 
5
 
6
- # ML & NLP
7
- transformers==4.35.2
8
- torch==2.1.1
9
- sentence-transformers==2.2.2
10
- spacy
 
 
11
 
12
- # Document Processing
13
- PyPDF2==3.0.1
14
- PyMuPDF==1.23.8
15
- python-docx==1.1.0
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # LLM Providers
19
- openai>=1.3.0
20
- anthropic>=0.18.0
21
- requests==2.31.0
22
- Ollama
23
 
24
- # Data & Validation
25
- pydantic==2.5.0
26
- pydantic-settings==2.1.0
27
 
28
  # Utilities
29
- python-dotenv==1.0.0
 
 
30
 
31
- # PDF report generation
32
- reportlab>=4.0.0
 
 
 
 
 
 
 
 
 
 
1
+ # Core Dependencies
2
+ fastapi>=0.104.1
3
+ uvicorn[standard]>=0.24.0
4
+ pydantic>=2.5.0
5
+ pydantic-settings>=2.1.0
6
+ python-multipart>=0.0.6
7
 
8
+ # AI & NLP Libraries
9
+ torch>=2.1.0
10
+ transformers>=4.35.0
11
+ sentence-transformers>=2.2.2
12
+ tokenizers>=0.14.0
13
+ safetensors>=0.4.0
14
+ accelerate>=0.24.0
15
 
16
+ # Data Processing & Math
17
+ numpy>=1.24.0
18
+ pandas>=2.1.0
19
+ scipy>=1.11.0
20
 
21
+ # Text Processing
22
+ spacy>=3.7.0
23
+
24
+ # PDF Generation
25
+ reportlab>=4.0.0
26
+ Pillow>=10.0.0
27
+
28
+ # Document Processing
29
+ PyPDF2>=3.0.0
30
+ PyMuPDF>=1.23.0
31
+ python-docx>=1.1.0
32
+
33
+ # HTTP Requests
34
+ requests>=2.31.0
35
 
36
  # LLM Providers
37
+ openai>=1.0.0
38
+ anthropic>=0.5.0
 
 
39
 
40
+ # Text Processing Utilities
41
+ chardet>=5.0.0
42
+ langdetect>=1.0.9
43
 
44
  # Utilities
45
+ tqdm>=4.66.0
46
+ python-dateutil>=2.8.0
47
+ typing-extensions>=4.8.0
48
 
49
+ # Async Support
50
+ anyio>=3.7.0
51
+
52
+ # OS Interaction
53
+ psutil>=5.9.5
54
+
55
+ # Better JSON for numpy serialization
56
+ orjson>=3.9.0
57
+
58
+ # For spaCy performance
59
+ blis>=0.7.10
services/clause_extractor.py CHANGED
@@ -9,8 +9,6 @@ from typing import Dict
9
  from typing import Tuple
10
  from pathlib import Path
11
  from typing import Optional
12
- from dataclasses import field
13
- from dataclasses import dataclass
14
  from collections import defaultdict
15
  from sentence_transformers import util
16
 
@@ -23,44 +21,9 @@ from config.risk_rules import RiskRules
23
  from config.risk_rules import ContractType
24
  from utils.text_processor import TextProcessor
25
  from utils.logger import ContractAnalyzerLogger
 
26
  from model_manager.model_loader import ModelLoader
27
-
28
-
29
- @dataclass
30
- class ExtractedClause:
31
- """
32
- Extracted clause with comprehensive metadata
33
- """
34
- text : str
35
- reference : str # e.g., "Section 5.2", "Clause 11.1"
36
- category : str # e.g., "termination", "compensation", "indemnification"
37
- confidence : float # 0.0-1.0
38
- start_pos : int
39
- end_pos : int
40
- extraction_method : str # "structural", "semantic", "hybrid"
41
- risk_indicators : List[str] = field(default_factory = list)
42
- embeddings : Optional[np.ndarray] = None
43
- subclauses : List[str] = field(default_factory = list)
44
- legal_bert_score : float = 0.0
45
- risk_score : float = 0.0
46
-
47
- def to_dict(self) -> Dict[str, Any]:
48
- """
49
- Convert to dictionary for serialization
50
- """
51
- return {"text" : self.text,
52
- "reference" : self.reference,
53
- "category" : self.category,
54
- "confidence" : round(self.confidence, 3),
55
- "start_pos" : self.start_pos,
56
- "end_pos" : self.end_pos,
57
- "extraction_method" : self.extraction_method,
58
- "risk_indicators" : self.risk_indicators,
59
- "subclauses" : self.subclauses,
60
- "legal_bert_score" : round(self.legal_bert_score, 3),
61
- "risk_score" : round(self.risk_score, 3),
62
- }
63
-
64
 
65
 
66
  class ComprehensiveClauseExtractor:
@@ -140,12 +103,6 @@ class ComprehensiveClauseExtractor:
140
  }
141
  }
142
 
143
- # RISK INDICATOR PATTERNS - ENHANCED
144
- RISK_INDICATORS = {'critical' : ['unlimited liability', 'perpetual', 'irrevocable', 'forfeit', 'liquidated damages', 'wage withholding', 'joint and several', 'automatic renewal', 'assignment without consent'],
145
- 'high' : ['non-compete', 'non-solicit', 'penalty', 'without cause', 'sole discretion', 'immediate termination', 'at-will', 'indemnify', 'hold harmless', 'waive'],
146
- 'medium' : ['confidential', 'proprietary', 'exclusive', 'terminate', 'default', 'breach', 'damages', 'liable', 'warranty disclaimer'],
147
- }
148
-
149
 
150
  def __init__(self, model_loader: ModelLoader):
151
  """
@@ -174,6 +131,9 @@ class ComprehensiveClauseExtractor:
174
 
175
  # Lazy load
176
  self._lazy_load()
 
 
 
177
 
178
 
179
  def _lazy_load(self):
@@ -288,6 +248,109 @@ class ComprehensiveClauseExtractor:
288
 
289
  return final_clauses
290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  def _extract_structural_clauses(self, text: str) -> List[Dict]:
293
  """
@@ -584,17 +647,42 @@ class ComprehensiveClauseExtractor:
584
 
585
  def _extract_risk_indicators(self, text: str) -> List[str]:
586
  """
587
- Extract risk indicator keywords from clause text
588
  """
589
  text_lower = text.lower()
590
  risk_indicators = list()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
- for severity, indicators in self.RISK_INDICATORS.items():
593
- for indicator in indicators:
594
- if indicator in text_lower:
595
- risk_indicators.append(indicator)
596
-
597
- return risk_indicators
598
 
599
 
600
  def _extract_subclauses(self, text: str) -> List[str]:
@@ -699,8 +787,7 @@ class ComprehensiveClauseExtractor:
699
 
700
  class RiskClauseExtractor:
701
  """
702
- Risk-Focused Clause Extractor: Specifically for risk analysis using RiskRules framework and integrates with
703
- risk_rules.py for contract-type specific risk assessment
704
 
705
  This will be used for: Risk analysis, protection gap detection, contract-type specific assessment
706
  """
@@ -955,7 +1042,7 @@ class RiskClauseExtractor:
955
  Calculate risk scores for clauses based on RiskRules factors
956
  """
957
  for clause in clauses:
958
- risk_score = self._calculate_single_clause_risk(clause)
959
  clause.risk_score = risk_score
960
 
961
  return clauses
@@ -963,29 +1050,35 @@ class RiskClauseExtractor:
963
 
964
  def _calculate_single_clause_risk(self, clause: ExtractedClause) -> float:
965
  """
966
- Calculate risk score for a single clause
967
  """
968
- base_risk = 0.0
969
-
970
- # Base risk from category weight
 
971
  category_weight = self.category_weights.get(clause.category, 1.0)
 
972
 
973
- # Normalize to 0-40
974
- base_risk += (category_weight / 15) * 40
975
 
976
- # Risk from indicators
977
- risk_indicators = self._extract_risk_indicators(clause.text)
 
 
978
 
979
- # 8 points per indicator
980
- indicator_risk = len(risk_indicators) * 8
981
- base_risk += min(indicator_risk, 40)
982
-
983
- # Risk from patterns
984
- pattern_risk = self._check_risk_patterns(clause.text)
985
- base_risk += pattern_risk
986
-
987
- # Cap at 100
988
- return min(base_risk, 100)
 
 
989
 
990
 
991
  def _extract_risk_indicators(self, text: str) -> List[str]:
 
9
  from typing import Tuple
10
  from pathlib import Path
11
  from typing import Optional
 
 
12
  from collections import defaultdict
13
  from sentence_transformers import util
14
 
 
21
  from config.risk_rules import ContractType
22
  from utils.text_processor import TextProcessor
23
  from utils.logger import ContractAnalyzerLogger
24
+ from services.data_models import ExtractedClause
25
  from model_manager.model_loader import ModelLoader
26
+ from services.data_models import ClauseInterpretation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  class ComprehensiveClauseExtractor:
 
103
  }
104
  }
105
 
 
 
 
 
 
 
106
 
107
  def __init__(self, model_loader: ModelLoader):
108
  """
 
131
 
132
  # Lazy load
133
  self._lazy_load()
134
+
135
+ # Risk Rules
136
+ self.risk_rules = RiskRules()
137
 
138
 
139
  def _lazy_load(self):
 
248
 
249
  return final_clauses
250
 
251
+
252
+ def generate_clause_analysis(self, clause: ExtractedClause, llm_interpretation: ClauseInterpretation = None) -> Dict[str, str]:
253
+ """
254
+ Generate analysis and recommendation for a clause
255
+
256
+ Arguments:
257
+ ----------
258
+ clause { ExtractedClause } : ExtractedClause object
259
+
260
+ llm_interpretation { ClauseInterpretation } : Optional ClauseInterpretation from LLM
261
+
262
+ Returns:
263
+ --------
264
+ { dict } : Dictionary with 'analysis' and 'recommendation' keys
265
+ """
266
+ if llm_interpretation:
267
+ # Use LLM interpretation if available
268
+ analysis = llm_interpretation.plain_english_summary
269
+
270
+ # Combine key points into analysis
271
+ if llm_interpretation.key_points:
272
+ analysis += " " + " ".join(llm_interpretation.key_points[:2])
273
+
274
+ # Combine potential risks into analysis
275
+ if llm_interpretation.potential_risks:
276
+ risk_text = " Key risks: " + ", ".join(llm_interpretation.potential_risks[:2])
277
+ analysis += risk_text
278
+
279
+ # Use suggested improvements as recommendation
280
+ if llm_interpretation.suggested_improvements:
281
+ recommendation = " ".join(llm_interpretation.suggested_improvements[:2])
282
+
283
+ else:
284
+ recommendation = "Review this clause with legal counsel for specific recommendations."
285
+
286
+ else:
287
+ # Fallback: Generate analysis from risk indicators and category
288
+ risk_indicators = clause.risk_indicators if clause.risk_indicators else []
289
+ risk_score = getattr(clause, 'risk_score', 0)
290
+
291
+ # Generate specific analysis based on category and risk
292
+ analysis = self._generate_fallback_analysis(clause = clause,
293
+ risk_indicators = risk_indicators,
294
+ risk_score = risk_score,
295
+ )
296
+
297
+ recommendation = self._generate_fallback_recommendation(clause = clause,
298
+ risk_indicators = risk_indicators,
299
+ risk_score = risk_score,
300
+ )
301
+
302
+ return {'analysis' : analysis,
303
+ 'recommendation' : recommendation,
304
+ }
305
+
306
+
307
+ def _generate_fallback_analysis(self, clause: ExtractedClause, risk_indicators: List[str], risk_score: float) -> str:
308
+ """
309
+ Generate fallback analysis when LLM unavailable
310
+ """
311
+ category_analyses = {'compensation' : f"This compensation clause {'contains concerning terms' if risk_score > 50 else 'appears standard'} regarding payment obligations and structures. ",
312
+ 'termination' : f"This termination clause {'creates significant imbalance' if risk_score > 60 else 'establishes'} the conditions and procedures for ending the agreement. ",
313
+ 'non_compete' : f"This restrictive covenant {'is overly broad and' if risk_score > 60 else ''} limits future business activities and employment opportunities. ",
314
+ 'confidentiality' : f"This confidentiality provision {'has excessive scope' if risk_score > 50 else 'defines'} the obligations to protect sensitive information. ",
315
+ 'indemnification' : f"This indemnification clause {'creates one-sided liability exposure' if risk_score > 60 else 'allocates'} responsibility for claims and losses. ",
316
+ 'intellectual_property' : f"This IP clause {'may claim overly broad ownership' if risk_score > 50 else 'addresses'} rights to work product and inventions. ",
317
+ 'liability' : f"This liability provision {'lacks adequate caps or limitations' if risk_score > 60 else 'establishes'} the financial exposure for damages. ",
318
+ }
319
+
320
+ analysis = category_analyses.get(clause.category, f"This {clause.category} clause establishes specific rights and obligations. ")
321
+
322
+ # Add risk-specific details
323
+ if risk_indicators:
324
+ analysis += f"Specific concerns include: {', '.join(risk_indicators[:3])}. "
325
+
326
+ if (risk_score > 70):
327
+ analysis += "This clause requires immediate attention and likely modification."
328
+
329
+ elif (risk_score > 50):
330
+ analysis += "This clause should be reviewed carefully and potentially negotiated."
331
+
332
+ else:
333
+ analysis += "This clause appears to contain standard provisions for this type of agreement."
334
+
335
+ return analysis
336
+
337
+
338
+ def _generate_fallback_recommendation(self, clause: ExtractedClause, risk_indicators: List[str], risk_score: float) -> str:
339
+ """
340
+ Generate fallback recommendation when LLM unavailable
341
+ """
342
+ if (risk_score > 70):
343
+ return f"Strongly recommend negotiating substantial changes to this clause. Seek legal counsel to address the identified risks and ensure your interests are protected."
344
+
345
+ elif (risk_score > 50):
346
+ return f"Negotiate modifications to balance the terms more fairly. Consider adding protective language or limiting the scope of obligations."
347
+
348
+ elif (risk_score > 30):
349
+ return f"Review with legal counsel to ensure the terms are clear and acceptable. Minor clarifications may be beneficial."
350
+
351
+ else:
352
+ return f"Standard clause - review for consistency with the overall agreement and your business needs."
353
+
354
 
355
  def _extract_structural_clauses(self, text: str) -> List[Dict]:
356
  """
 
647
 
648
  def _extract_risk_indicators(self, text: str) -> List[str]:
649
  """
650
+ Extract risk indicator keywords from clause text using RiskRule with the central risk rules
651
  """
652
  text_lower = text.lower()
653
  risk_indicators = list()
654
+
655
+ # Check for matches against CRITICAL_KEYWORDS from RiskRules
656
+ for keyword in self.risk_rules.CRITICAL_KEYWORDS.keys():
657
+ if keyword in text_lower:
658
+ risk_indicators.append(keyword)
659
+
660
+ # Check for matches against HIGH_RISK_KEYWORDS from RiskRules
661
+ for keyword in self.risk_rules.HIGH_RISK_KEYWORDS.keys():
662
+ if keyword in text_lower:
663
+ risk_indicators.append(keyword)
664
+
665
+ # Check for matches against MEDIUM_RISK_KEYWORDS from RiskRules
666
+ for keyword in self.risk_rules.MEDIUM_RISK_KEYWORDS.keys():
667
+ if keyword in text_lower:
668
+ risk_indicators.append(keyword)
669
+
670
+ # Check for matches against RISKY_PATTERNS from RiskRules
671
+ for pattern, score, description in self.risk_rules.RISKY_PATTERNS:
672
+ if re.search(pattern, text_lower):
673
+ # Use the description from RiskRules as the indicator
674
+ risk_indicators.append(description)
675
+
676
+ # Remove duplicates while preserving order
677
+ seen = set()
678
+ unique_indicators = list()
679
+
680
+ for indicator in risk_indicators:
681
+ if indicator not in seen:
682
+ seen.add(indicator)
683
+ unique_indicators.append(indicator)
684
 
685
+ return unique_indicators
 
 
 
 
 
686
 
687
 
688
  def _extract_subclauses(self, text: str) -> List[str]:
 
787
 
788
  class RiskClauseExtractor:
789
  """
790
+ Risk-Focused Clause Extractor: Specifically for risk analysis using RiskRules framework for contract-type specific risk assessment
 
791
 
792
  This will be used for: Risk analysis, protection gap detection, contract-type specific assessment
793
  """
 
1042
  Calculate risk scores for clauses based on RiskRules factors
1043
  """
1044
  for clause in clauses:
1045
+ risk_score = self._calculate_single_clause_risk(clause = clause)
1046
  clause.risk_score = risk_score
1047
 
1048
  return clauses
 
1050
 
1051
  def _calculate_single_clause_risk(self, clause: ExtractedClause) -> float:
1052
  """
1053
+ Calculate risk score using RiskRules framework
1054
  """
1055
+ base_score = 0.0
1056
+ text_lower = clause.text.lower()
1057
+
1058
+ # Base risk from category weight (adjusted for contract type)
1059
  category_weight = self.category_weights.get(clause.category, 1.0)
1060
+ base_score += category_weight
1061
 
1062
+ # Add risk from CLAUSE_RISK_FACTORS (red flags)
1063
+ factor_config = self.risk_rules.CLAUSE_RISK_FACTORS.get(clause.category)
1064
 
1065
+ if factor_config:
1066
+ for red_flag, adjustment in factor_config["red_flags"].items():
1067
+ if red_flag in text_lower:
1068
+ base_score += adjustment
1069
 
1070
+ # Add risk from RISKY_PATTERNS (with actual scores)
1071
+ for pattern, score, description in self.risk_rules.RISKY_PATTERNS:
1072
+ if re.search(pattern, text_lower):
1073
+ base_score += score
1074
+
1075
+ # Add risk from CRITICAL_KEYWORDS
1076
+ for keyword, risk_score in self.risk_rules.CRITICAL_KEYWORDS.items():
1077
+ if re.search(rf'\b{re.escape(keyword)}\b', text_lower):
1078
+ base_score += risk_score
1079
+
1080
+ # Cap final score at 100
1081
+ return min(max(base_score, 0), 100)
1082
 
1083
 
1084
  def _extract_risk_indicators(self, text: str) -> List[str]:
services/contract_classifier.py CHANGED
@@ -21,31 +21,7 @@ from config.risk_rules import ContractType
21
  from config.model_config import ModelConfig
22
  from utils.text_processor import TextProcessor
23
  from utils.logger import ContractAnalyzerLogger
24
-
25
-
26
- @dataclass
27
- class ContractCategory:
28
- """
29
- Contract classification result with metadata
30
- """
31
- category : str
32
- subcategory : Optional[str]
33
- confidence : float
34
- reasoning : List[str]
35
- detected_keywords : List[str]
36
- alternative_categories : List[Tuple[str, float]] = None # (category, confidence) pairs
37
-
38
- def to_dict(self) -> Dict[str, Any]:
39
- """
40
- Convert to dictionary for serialization
41
- """
42
- return {"category" : self.category,
43
- "subcategory" : self.subcategory,
44
- "confidence" : round(self.confidence, 3),
45
- "reasoning" : self.reasoning,
46
- "detected_keywords" : self.detected_keywords,
47
- "alternative_categories" : [{"category": cat, "confidence": round(conf, 3)} for cat, conf in (self.alternative_categories or [])]
48
- }
49
 
50
 
51
  class ContractClassifier:
@@ -59,15 +35,15 @@ class ContractClassifier:
59
  # CATEGORY HIERARCHY WITH KEYWORDS - UPDATED TO MATCH YOUR CATEGORIES
60
  CATEGORY_HIERARCHY = {'employment' : {'subcategories' : ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
61
  'keywords' : ['employee', 'employment', 'employer', 'job', 'position', 'staff', 'salary', 'wages', 'compensation', 'payroll', 'benefits', 'health insurance', 'retirement', 'pension', '401(k)', 'vacation', 'paid time off', 'sick leave', 'holidays', 'probation', 'performance review', 'promotion', 'termination', 'job description', 'duties', 'responsibilities', 'work hours', 'overtime', 'timekeeping', 'attendance', 'confidentiality', 'non-compete', 'non-solicitation', 'intellectual property', 'inventions', 'work product', 'severance', 'notice period', 'resignation', 'dismissal'],
62
- 'weight' : 1.2,
63
  },
64
  'consulting' : {'subcategories' : ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
65
  'keywords' : ['consultant', 'consulting', 'independent contractor', 'statement of work', 'deliverables', 'professional services', 'hourly rate', 'project scope', 'milestone', 'acceptance criteria', 'work product', '1099', 'self-employed', 'contractor', 'consulting services', 'expert advice', 'advisory services', 'project basis', 'task order'],
66
- 'weight' : 1.1,
67
  },
68
  'nda' : {'subcategories' : ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
69
  'keywords' : ['non-disclosure', 'confidentiality', 'proprietary information', 'nda', 'disclosure agreement', 'trade secret', 'confidential information', 'receiving party', 'disclosing party', 'confidentiality obligation', 'non-use', 'non-circumvention', 'secrecy', 'protected information', 'confidentiality period', 'return of information'],
70
- 'weight' : 1.3,
71
  },
72
  'software' : {'subcategories' : ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
73
  'keywords' : ['software', 'license', 'saas', 'subscription', 'source code', 'object code', 'api', 'cloud', 'hosting', 'maintenance', 'updates', 'support', 'uptime', 'service level', 'software as a service', 'platform', 'application', 'user license', 'perpetual license', 'subscription fee', 'end user license agreement', 'eula'],
@@ -314,7 +290,6 @@ class ContractClassifier:
314
 
315
  { ContractCategory } : ContractCategory object with classification results
316
  """
317
-
318
  # Validate input
319
  if (not contract_text or (len(contract_text) < 100)):
320
  raise ValueError("Contract text too short for classification")
@@ -331,16 +306,16 @@ class ContractClassifier:
331
  excerpt_length = len(text_excerpt),
332
  )
333
 
334
- # Step 1: Keyword scoring
335
  keyword_scores = self._score_keywords(text_lower = contract_text.lower())
336
-
337
  # Semantic similarity
338
  semantic_scores = self._semantic_similarity(text = text_excerpt)
339
-
340
- # Step 3: Legal-BERT semantic similarity (enhanced)
341
  legal_bert_scores = self._legal_bert_similarity(text = text_excerpt)
342
-
343
- # Step 4: Combine scores (weighted average)
344
  combined_scores = self._combine_scores(keyword_scores = keyword_scores,
345
  semantic_scores = semantic_scores,
346
  legal_bert_scores = legal_bert_scores,
@@ -403,23 +378,23 @@ class ContractClassifier:
403
  def _score_keywords(self, text_lower: str) -> Dict[str, float]:
404
  """
405
  Score each category based on keyword presence
406
-
407
  Arguments:
408
  ----------
409
  text_lower { str } : Lowercase contract text
410
-
411
  Returns:
412
  --------
413
  { dict } : Dictionary of {category: score}
414
  """
415
  scores = dict()
416
-
417
  for category, config in self.CATEGORY_HIERARCHY.items():
418
- keywords = config['keywords']
419
- weight = config['weight']
420
 
421
  # Count keyword matches with partial matching for multi-word terms
422
- keyword_count = 0
 
423
  for keyword in keywords:
424
  # Check for exact match or partial match for multi-word terms
425
  if ' ' in keyword:
@@ -429,18 +404,15 @@ class ContractClassifier:
429
  keyword_count += 1
430
 
431
  else:
432
- # For single words, exact match
433
  if re.search(rf'\b{re.escape(keyword)}\b', text_lower):
434
  keyword_count += 1
435
-
436
  # Normalize by number of keywords and apply weight
437
  normalized_score = (keyword_count / len(keywords)) * weight
438
 
439
- # Boost score if we have significant keyword matches
440
- if keyword_count >= 3:
441
- normalized_score *= 1.2
442
-
443
- scores[category] = min(normalized_score, 1.0) # Cap at 1.0
444
 
445
  return scores
446
 
@@ -548,9 +520,9 @@ class ContractClassifier:
548
  combined = dict()
549
 
550
  # Weights for each method
551
- keyword_weight = 0.45 # Increased from 0.30
552
- semantic_weight = 0.35 # Reduced from 0.40
553
- legal_bert_weight = 0.20 # Reduced from 0.30
554
 
555
  for category in self.CATEGORY_HIERARCHY.keys():
556
  score = (keyword_scores.get(category, 0) * keyword_weight +
 
21
  from config.model_config import ModelConfig
22
  from utils.text_processor import TextProcessor
23
  from utils.logger import ContractAnalyzerLogger
24
+ from services.data_models import ContractCategory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
  class ContractClassifier:
 
35
  # CATEGORY HIERARCHY WITH KEYWORDS - UPDATED TO MATCH YOUR CATEGORIES
36
  CATEGORY_HIERARCHY = {'employment' : {'subcategories' : ['full_time', 'part_time', 'contract_worker', 'internship', 'executive'],
37
  'keywords' : ['employee', 'employment', 'employer', 'job', 'position', 'staff', 'salary', 'wages', 'compensation', 'payroll', 'benefits', 'health insurance', 'retirement', 'pension', '401(k)', 'vacation', 'paid time off', 'sick leave', 'holidays', 'probation', 'performance review', 'promotion', 'termination', 'job description', 'duties', 'responsibilities', 'work hours', 'overtime', 'timekeeping', 'attendance', 'confidentiality', 'non-compete', 'non-solicitation', 'intellectual property', 'inventions', 'work product', 'severance', 'notice period', 'resignation', 'dismissal'],
38
+ 'weight' : 1.1,
39
  },
40
  'consulting' : {'subcategories' : ['independent_contractor', 'advisory', 'professional_services', 'freelance'],
41
  'keywords' : ['consultant', 'consulting', 'independent contractor', 'statement of work', 'deliverables', 'professional services', 'hourly rate', 'project scope', 'milestone', 'acceptance criteria', 'work product', '1099', 'self-employed', 'contractor', 'consulting services', 'expert advice', 'advisory services', 'project basis', 'task order'],
42
+ 'weight' : 1.0,
43
  },
44
  'nda' : {'subcategories' : ['mutual_nda', 'unilateral_nda', 'confidentiality_agreement'],
45
  'keywords' : ['non-disclosure', 'confidentiality', 'proprietary information', 'nda', 'disclosure agreement', 'trade secret', 'confidential information', 'receiving party', 'disclosing party', 'confidentiality obligation', 'non-use', 'non-circumvention', 'secrecy', 'protected information', 'confidentiality period', 'return of information'],
46
+ 'weight' : 1.0,
47
  },
48
  'software' : {'subcategories' : ['software_license', 'saas', 'cloud_services', 'development', 'api_access'],
49
  'keywords' : ['software', 'license', 'saas', 'subscription', 'source code', 'object code', 'api', 'cloud', 'hosting', 'maintenance', 'updates', 'support', 'uptime', 'service level', 'software as a service', 'platform', 'application', 'user license', 'perpetual license', 'subscription fee', 'end user license agreement', 'eula'],
 
290
 
291
  { ContractCategory } : ContractCategory object with classification results
292
  """
 
293
  # Validate input
294
  if (not contract_text or (len(contract_text) < 100)):
295
  raise ValueError("Contract text too short for classification")
 
306
  excerpt_length = len(text_excerpt),
307
  )
308
 
309
+ # Keyword scoring
310
  keyword_scores = self._score_keywords(text_lower = contract_text.lower())
311
+
312
  # Semantic similarity
313
  semantic_scores = self._semantic_similarity(text = text_excerpt)
314
+
315
+ # Legal-BERT semantic similarity (enhanced)
316
  legal_bert_scores = self._legal_bert_similarity(text = text_excerpt)
317
+
318
+ # Combine scores (weighted average)
319
  combined_scores = self._combine_scores(keyword_scores = keyword_scores,
320
  semantic_scores = semantic_scores,
321
  legal_bert_scores = legal_bert_scores,
 
378
  def _score_keywords(self, text_lower: str) -> Dict[str, float]:
379
  """
380
  Score each category based on keyword presence
381
+
382
  Arguments:
383
  ----------
384
  text_lower { str } : Lowercase contract text
385
+
386
  Returns:
387
  --------
388
  { dict } : Dictionary of {category: score}
389
  """
390
  scores = dict()
 
391
  for category, config in self.CATEGORY_HIERARCHY.items():
392
+ keywords = config['keywords']
393
+ weight = config['weight']
394
 
395
  # Count keyword matches with partial matching for multi-word terms
396
+ keyword_count = 0
397
+
398
  for keyword in keywords:
399
  # Check for exact match or partial match for multi-word terms
400
  if ' ' in keyword:
 
404
  keyword_count += 1
405
 
406
  else:
407
+ # For single words, exact word boundary match
408
  if re.search(rf'\b{re.escape(keyword)}\b', text_lower):
409
  keyword_count += 1
410
+
411
  # Normalize by number of keywords and apply weight
412
  normalized_score = (keyword_count / len(keywords)) * weight
413
 
414
+ # Cap at 1.0
415
+ scores[category] = min(normalized_score, 1.0)
 
 
 
416
 
417
  return scores
418
 
 
520
  combined = dict()
521
 
522
  # Weights for each method
523
+ keyword_weight = 0.35
524
+ semantic_weight = 0.35
525
+ legal_bert_weight = 0.30
526
 
527
  for category in self.CATEGORY_HIERARCHY.keys():
528
  score = (keyword_scores.get(category, 0) * keyword_weight +
services/data_models.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DEPENDENCIES
2
+ import sys
3
+ import numpy as np
4
+ from enum import Enum
5
+ from typing import Any
6
+ from typing import Dict
7
+ from typing import List
8
+ from typing import Tuple
9
+ from pathlib import Path
10
+ from typing import Optional
11
+ from dataclasses import field
12
+ from dataclasses import dataclass
13
+
14
+ # Add parent directory to path for imports
15
+ sys.path.append(str(Path(__file__).parent.parent))
16
+
17
+
18
+
19
+
20
+ @dataclass
21
+ class ExtractedClause:
22
+ """
23
+ Extracted clause with comprehensive metadata
24
+ """
25
+ text : str
26
+ reference : str # e.g., "Section 5.2", "Clause 11.1"
27
+ category : str # e.g., "termination", "compensation", "indemnification"
28
+ confidence : float # 0.0-1.0
29
+ start_pos : int
30
+ end_pos : int
31
+ extraction_method : str # "structural", "semantic", "hybrid"
32
+ risk_indicators : List[str] = field(default_factory = list)
33
+ embeddings : Optional[np.ndarray] = None
34
+ subclauses : List[str] = field(default_factory = list)
35
+ legal_bert_score : float = 0.0
36
+ risk_score : float = 0.0
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ """
40
+ Convert to dictionary for serialization
41
+ """
42
+ return {"text" : self.text,
43
+ "reference" : self.reference,
44
+ "category" : self.category,
45
+ "confidence" : round(self.confidence, 3),
46
+ "start_pos" : self.start_pos,
47
+ "end_pos" : self.end_pos,
48
+ "extraction_method" : self.extraction_method,
49
+ "risk_indicators" : self.risk_indicators,
50
+ "subclauses" : self.subclauses,
51
+ "legal_bert_score" : round(self.legal_bert_score, 3),
52
+ "risk_score" : round(self.risk_score, 3),
53
+ }
54
+
55
+
56
+ @dataclass
57
+ class UnfavorableTerm:
58
+ """
59
+ Detected unfavorable term with comprehensive risk analysis
60
+ """
61
+ term : str
62
+ category : str # Risk category (e.g., "restrictive_covenants")
63
+ severity : str # "critical", "high", "medium", "low"
64
+ explanation : str
65
+ risk_score : float # 0-100 risk score
66
+ clause_reference : Optional[str] = None
67
+ suggested_fix : Optional[str] = None
68
+ contract_type : Optional[str] = None
69
+ specific_text : Optional[str] = None
70
+ benchmark_info : Optional[str] = None # Industry benchmark comparison
71
+ legal_basis : Optional[str] = None # Legal principle violated
72
+
73
+ def to_dict(self) -> Dict:
74
+ """
75
+ Convert to dictionary
76
+ """
77
+ return {"term" : self.term,
78
+ "category" : self.category,
79
+ "severity" : self.severity,
80
+ "explanation" : self.explanation,
81
+ "risk_score" : round(self.risk_score, 2),
82
+ "clause_reference" : self.clause_reference,
83
+ "suggested_fix" : self.suggested_fix,
84
+ "contract_type" : self.contract_type,
85
+ "specific_text" : self.specific_text,
86
+ "benchmark_info" : self.benchmark_info,
87
+ "legal_basis" : self.legal_basis,
88
+ }
89
+
90
+
91
+ @dataclass
92
+ class ClauseInterpretation:
93
+ """
94
+ LLM interpretation of a clause with comprehensive analysis
95
+ """
96
+ clause_reference : str
97
+ original_text : str
98
+ plain_english_summary : str
99
+ key_points : List[str]
100
+ potential_risks : List[str]
101
+ suggested_improvements : List[str]
102
+ favorability : str = "neutral"
103
+ confidence_score : float = 0.0
104
+ risk_level : str = "unknown"
105
+ negotiation_priority : str = "medium"
106
+ legal_precedents : List[str] = field(default_factory = list)
107
+ negotiation_leverage : List[str] = field(default_factory = list)
108
+ market_comparison : Optional[str] = None
109
+ risk_score : float = 0.0
110
+
111
+ def to_dict(self) -> Dict[str, Any]:
112
+
113
+ return {"clause_reference" : self.clause_reference,
114
+ "original_text" : self.original_text,
115
+ "plain_english_summary" : self.plain_english_summary,
116
+ "key_points" : self.key_points,
117
+ "potential_risks" : self.potential_risks,
118
+ "suggested_improvements" : self.suggested_improvements,
119
+ "favorability" : self.favorability,
120
+ "confidence_score" : round(self.confidence_score, 3),
121
+ "risk_level" : self.risk_level,
122
+ "negotiation_priority" : self.negotiation_priority,
123
+ "legal_precedents" : self.legal_precedents,
124
+ "negotiation_leverage" : self.negotiation_leverage,
125
+ "market_comparison" : self.market_comparison,
126
+ "risk_score" : round(self.risk_score, 3),
127
+ }
128
+
129
+
130
+ @dataclass
131
+ class MissingProtection:
132
+ """
133
+ Missing protection item with comprehensive risk analysis
134
+ """
135
+ protection_id : str # Internal identifier
136
+ protection : str
137
+ importance : str # "critical", "high", "medium", "low"
138
+ risk_score : float # 0-100 from risk_rules
139
+ explanation : str
140
+ recommendation : str
141
+ categories : List[str]
142
+ contract_type : Optional[str] = None
143
+ suggested_language : Optional[str] = None
144
+ legal_basis : Optional[str] = None
145
+ affected_clauses : Optional[List[str]] = None
146
+
147
+ def to_dict(self) -> Dict:
148
+ """
149
+ Convert to dictionary
150
+ """
151
+ return {"protection_id" : self.protection_id,
152
+ "protection" : self.protection,
153
+ "importance" : self.importance,
154
+ "risk_score" : round(self.risk_score, 2),
155
+ "explanation" : self.explanation,
156
+ "recommendation" : self.recommendation,
157
+ "categories" : self.categories,
158
+ "contract_type" : self.contract_type,
159
+ "suggested_language" : self.suggested_language,
160
+ "legal_basis" : self.legal_basis,
161
+ "affected_clauses" : self.affected_clauses or [],
162
+ }
163
+
164
+
165
+ @dataclass
166
+ class ContractCategory:
167
+ """
168
+ Contract classification result with metadata
169
+ """
170
+ category : str
171
+ subcategory : Optional[str]
172
+ confidence : float
173
+ reasoning : List[str]
174
+ detected_keywords : List[str]
175
+ alternative_categories : List[Tuple[str, float]] = None # (category, confidence) pairs
176
+
177
+ def to_dict(self) -> Dict[str, Any]:
178
+ """
179
+ Convert to dictionary for serialization
180
+ """
181
+ return {"category" : self.category,
182
+ "subcategory" : self.subcategory,
183
+ "confidence" : round(self.confidence, 3),
184
+ "reasoning" : self.reasoning,
185
+ "detected_keywords" : self.detected_keywords,
186
+ "alternative_categories" : [{"category": cat, "confidence": round(conf, 3)} for cat, conf in (self.alternative_categories or [])]
187
+ }
188
+
189
+
190
+
191
+ @dataclass
192
+ class RiskBreakdownItem:
193
+ """
194
+ Individual risk category breakdown
195
+ """
196
+ category : str
197
+ score : int # 0-100
198
+ summary : str
199
+ findings : List[str] = field(default_factory = list)
200
+
201
+
202
+ def to_dict(self) -> Dict[str, Any]:
203
+ """
204
+ Convert to dictionary
205
+ """
206
+ return {"category" : self.category,
207
+ "score" : self.score,
208
+ "summary" : self.summary,
209
+ "findings" : self.findings,
210
+ }
211
+
212
+
213
+ @dataclass
214
+ class RiskScore:
215
+ """
216
+ Comprehensive risk score with detailed breakdown
217
+ """
218
+ overall_score : int # 0-100
219
+ risk_level : str # "CRITICAL", "HIGH", "MEDIUM", "LOW"
220
+ category_scores : Dict[str, int]
221
+ risk_factors : List[str]
222
+ detailed_findings : Dict[str, List[str]]
223
+ benchmark_comparison : Dict[str, str]
224
+ risk_breakdown : List[RiskBreakdownItem]
225
+ contract_type : str
226
+ unfavorable_terms : List[Dict]
227
+ missing_protections : List[Dict]
228
+ high_risk_clauses : List[Dict] = field(default_factory = list)
229
+ explanation : str = ""
230
+ recommendations : List[str] = field(default_factory = list)
231
+ analysis_timestamp : Optional[str] = None
232
+ contract_subtype : Optional[str] = None
233
+ contract_metadata : Optional[Dict[str, Any]] = field(default_factory = dict)
234
+
235
+ def to_dict(self) -> Dict[str, Any]:
236
+ """
237
+ Convert to dictionary for serialization
238
+ """
239
+ return {"overall_score" : self.overall_score,
240
+ "risk_level" : self.risk_level,
241
+ "category_scores" : self.category_scores,
242
+ "risk_factors" : self.risk_factors,
243
+ "detailed_findings" : self.detailed_findings,
244
+ "benchmark_comparison" : self.benchmark_comparison,
245
+ "risk_breakdown" : [item.to_dict() for item in self.risk_breakdown],
246
+ "contract_type" : self.contract_type,
247
+ "unfavorable_terms" : self.unfavorable_terms,
248
+ "missing_protections" : self.missing_protections,
249
+ "high_risk_clauses" : self.high_risk_clauses,
250
+ "explanation" : self.explanation,
251
+ "recommendations" : self.recommendations,
252
+ "analysis_timestamp" : self.analysis_timestamp,
253
+ "contract_subtype" : self.contract_subtype,
254
+ "contract_metadata" : self.contract_metadata,
255
+ }
256
+
257
+
258
+ @dataclass
259
+ class RiskInterpretation:
260
+ """
261
+ Comprehensive risk interpretation with LLM-enhanced explanations
262
+ """
263
+ overall_risk_explanation : str
264
+ key_concerns : List[str]
265
+ negotiation_strategy : str
266
+ market_comparison : str
267
+ clause_interpretations : List[ClauseInterpretation]
268
+
269
+ def to_dict(self) -> Dict[str, Any]:
270
+ """
271
+ Convert to dictionary
272
+ """
273
+ return {"overall_risk_explanation" : self.overall_risk_explanation,
274
+ "key_concerns" : self.key_concerns,
275
+ "negotiation_strategy" : self.negotiation_strategy,
276
+ "market_comparison" : self.market_comparison,
277
+ "clause_interpretations" : [ci.to_dict() for ci in self.clause_interpretations],
278
+ }
279
+
280
+
281
+ class NegotiationTactic(Enum):
282
+ """
283
+ Types of negotiation tactics
284
+ """
285
+ REMOVAL = "removal"
286
+ MODIFICATION = "modification"
287
+ ADDITION = "addition"
288
+ LIMITATION = "limitation"
289
+ MUTUALIZATION = "mutualization"
290
+ CLARIFICATION = "clarification"
291
+
292
+
293
+ @dataclass
294
+ class NegotiationPoint:
295
+ """
296
+ Negotiation talking point with strategic context
297
+ """
298
+ priority : int # 1 = highest, 5 = lowest
299
+ category : str
300
+ issue : str
301
+ current_language : str
302
+ proposed_language : str
303
+ rationale : str
304
+ tactic : NegotiationTactic
305
+ fallback_position : Optional[str] = None
306
+ estimated_difficulty : str = "medium" # "easy", "medium", "hard"
307
+ legal_basis : Optional[str] = None
308
+ business_impact : Optional[str] = None
309
+ counterparty_concerns : Optional[str] = None
310
+ timing_suggestion : Optional[str] = None
311
+ bargaining_chips : List[str] = None
312
+
313
+ def to_dict(self) -> Dict[str, Any]:
314
+ """
315
+ Convert to dictionary
316
+ """
317
+ return {"priority" : self.priority,
318
+ "category" : self.category,
319
+ "issue" : self.issue,
320
+ "current_language" : self.current_language,
321
+ "proposed_language" : self.proposed_language,
322
+ "rationale" : self.rationale,
323
+ "tactic" : self.tactic.value,
324
+ "fallback_position" : self.fallback_position,
325
+ "estimated_difficulty" : self.estimated_difficulty,
326
+ "legal_basis" : self.legal_basis,
327
+ "business_impact" : self.business_impact,
328
+ "counterparty_concerns" : self.counterparty_concerns,
329
+ "timing_suggestion" : self.timing_suggestion,
330
+ "bargaining_chips" : self.bargaining_chips or [],
331
+ }
332
+
333
+
334
+ @dataclass
335
+ class NegotiationPlaybook:
336
+ """
337
+ Comprehensive negotiation strategy
338
+ """
339
+ overall_strategy : str
340
+ critical_points : List[NegotiationPoint]
341
+ walk_away_items : List[str]
342
+ concession_items : List[str]
343
+ timing_guidance : str
344
+ risk_mitigation_plan : str
345
+
346
+ def to_dict(self) -> Dict[str, Any]:
347
+ """
348
+ Convert to dictionary
349
+ """
350
+ return {"overall_strategy" : self.overall_strategy,
351
+ "critical_points" : [point.to_dict() for point in self.critical_points],
352
+ "walk_away_items" : self.walk_away_items,
353
+ "concession_items" : self.concession_items,
354
+ "timing_guidance" : self.timing_guidance,
355
+ "risk_mitigation_plan" : self.risk_mitigation_plan,
356
+ }
357
+
358
+
359
+ @dataclass
360
+ class SummaryContext:
361
+ """
362
+ Context data for comprehensive summary generation
363
+ """
364
+ contract_type : str
365
+ risk_score : int
366
+ risk_level : str
367
+ category_scores : Dict[str, int]
368
+ unfavorable_terms : List[Dict]
369
+ missing_protections : List[Dict]
370
+ clauses : List
371
+ key_findings : List[str]
372
+ risk_interpretation : Optional[RiskInterpretation] = None
373
+ negotiation_playbook : Optional[NegotiationPlaybook] = None
374
+ contract_text_preview : Optional[str] = None
375
+ contract_metadata : Optional[Dict[str, Any]] = None
376
+
377
+
378
+ @dataclass
379
+ class ModelInfo:
380
+ """
381
+ Model metadata and state
382
+ """
383
+ name : str
384
+ type : str # "legal-bert", "embedding", "tokenizer", "classifier"
385
+ status : str # "not_loaded", "loading", "loaded", "error"
386
+ model : Optional[Any] = None
387
+ tokenizer : Optional[Any] = None
388
+ loaded_at : Optional[str] = None
389
+ error_message : Optional[str] = None
390
+ memory_size_mb : float = 0.0
391
+ access_count : int = 0
392
+ last_accessed : Optional[str] = None
393
+ metadata : Dict[str, Any] = field(default_factory = dict)
394
+
395
+
396
+ def mark_accessed(self):
397
+ """
398
+ Update access statistics
399
+ """
400
+ self.access_count += 1
401
+ # Simple timestamp
402
+ self.last_accessed = "now"
403
+
404
+
405
+ def get_age_seconds(self) -> float:
406
+ """
407
+ Get seconds since last access (simplified)
408
+ """
409
+ return 0.0 if not self.last_accessed else 3600.0
services/llm_interpreter.py CHANGED
@@ -7,7 +7,6 @@ from typing import Dict
7
  from typing import Tuple
8
  from pathlib import Path
9
  from typing import Optional
10
- from dataclasses import dataclass
11
 
12
  # Add parent directory to path for imports
13
  sys.path.append(str(Path(__file__).parent.parent))
@@ -18,69 +17,14 @@ from config.risk_rules import RiskRules
18
  from config.risk_rules import ContractType
19
  from utils.logger import ContractAnalyzerLogger
20
  from model_manager.llm_manager import LLMManager
 
21
  from model_manager.llm_manager import LLMProvider
22
- from services.term_analyzer import UnfavorableTerm
 
23
  from services.clause_extractor import ExtractedClause
24
  from services.protection_checker import MissingProtection
25
 
26
 
27
-
28
- @dataclass
29
- class ClauseInterpretation:
30
- """
31
- Plain-English interpretation of a legal clause with risk context
32
- """
33
- clause_reference : str
34
- original_text : str
35
- plain_english_summary : str
36
- key_points : List[str]
37
- potential_risks : List[str]
38
- favorability : str # "favorable", "neutral", "unfavorable"
39
- confidence : float
40
- risk_score : float # 0-100 from RiskAnalyzer
41
- negotiation_priority : str # "high", "medium", "low"
42
- suggested_improvements : List[str]
43
-
44
- def to_dict(self) -> Dict[str, Any]:
45
- """
46
- Convert to dictionary
47
- """
48
- return {"clause_reference" : self.clause_reference,
49
- "original_text" : self.original_text,
50
- "plain_english_summary" : self.plain_english_summary,
51
- "key_points" : self.key_points,
52
- "potential_risks" : self.potential_risks,
53
- "favorability" : self.favorability,
54
- "confidence" : round(self.confidence, 3),
55
- "risk_score" : round(self.risk_score, 2),
56
- "negotiation_priority" : self.negotiation_priority,
57
- "suggested_improvements" : self.suggested_improvements,
58
- }
59
-
60
-
61
- @dataclass
62
- class RiskInterpretation:
63
- """
64
- Comprehensive risk interpretation with LLM-enhanced explanations
65
- """
66
- overall_risk_explanation : str
67
- key_concerns : List[str]
68
- negotiation_strategy : str
69
- market_comparison : str
70
- clause_interpretations : List[ClauseInterpretation]
71
-
72
- def to_dict(self) -> Dict[str, Any]:
73
- """
74
- Convert to dictionary
75
- """
76
- return {"overall_risk_explanation" : self.overall_risk_explanation,
77
- "key_concerns" : self.key_concerns,
78
- "negotiation_strategy" : self.negotiation_strategy,
79
- "market_comparison" : self.market_comparison,
80
- "clause_interpretations" : [ci.to_dict() for ci in self.clause_interpretations],
81
- }
82
-
83
-
84
  class LLMClauseInterpreter:
85
  """
86
  Uses LLM to generate plain-English explanations for legal clauses and integrated with RiskAnalyzer results and RiskRules framework
@@ -92,7 +36,6 @@ class LLMClauseInterpreter:
92
  Arguments:
93
  ----------
94
  llm_manager { LLMManager } : LLMManager instance
95
-
96
  default_provider { LLMProvider } : Default LLM provider to use
97
  """
98
  self.llm_manager = llm_manager
@@ -293,7 +236,7 @@ class LLMClauseInterpreter:
293
  key_points = result.get("key_points", []),
294
  potential_risks = result.get("potential_risks", []),
295
  favorability = result.get("favorability", "neutral"),
296
- confidence = 0.85, # High confidence if LLM succeeded
297
  risk_score = getattr(clause, 'risk_score', 0),
298
  negotiation_priority = negotiation_priority,
299
  suggested_improvements = result.get("suggested_improvements", []),
@@ -316,40 +259,51 @@ class LLMClauseInterpreter:
316
 
317
  def _create_interpretation_prompt(self, clause: ExtractedClause) -> str:
318
  """
319
- Create enhanced prompt with risk context for LLM interpretation
320
  """
321
  risk_context = ""
322
 
323
  if clause.risk_indicators:
324
- risk_context = f"\nRisk indicators detected: {', '.join(clause.risk_indicators)}"
325
 
326
  risk_score_context = ""
327
-
328
  if hasattr(clause, 'risk_score'):
329
- risk_score_context = f"\nRisk score: {clause.risk_score}/100"
 
 
 
 
 
 
 
 
 
330
 
331
  prompt = f"""
332
- You are a legal expert explaining contract clauses to non-lawyers.
333
-
334
- CLAUSE CONTEXT:
335
- - Reference: {clause.reference}
336
- - Category: {clause.category}
337
- - Confidence: {clause.confidence:.2f}{risk_score_context}{risk_context}
338
 
339
- CLAUSE TEXT:
340
- \"\"\"{clause.text}\"\"\"
341
 
342
- Provide a plain-English interpretation suitable for someone without legal training:
343
 
344
- 1. SUMMARY: Explain what this clause means in 1-2 simple sentences
345
- 2. KEY POINTS: List 3-5 key things to understand about this clause
346
- 3. POTENTIAL RISKS: Identify 2-4 potential risks or concerns with this clause
347
- 4. FAVORABILITY: Rate as "favorable", "neutral", or "unfavorable" from the recipient's perspective
348
- 5. SUGGESTED IMPROVEMENTS: Provide 2-3 specific suggestions to improve this clause
 
349
 
350
- Focus on practical implications and business impact. Be clear, concise, and actionable.
351
 
352
- Return ONLY valid JSON.
 
 
 
 
 
 
 
353
  """
354
 
355
  return prompt
@@ -367,6 +321,23 @@ class LLMClauseInterpreter:
367
 
368
  else:
369
  return "low"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
 
372
  def _fallback_interpretation(self, clause: ExtractedClause) -> ClauseInterpretation:
@@ -424,43 +395,99 @@ class LLMClauseInterpreter:
424
  key_points = key_points,
425
  potential_risks = potential_risks,
426
  favorability = favorability,
427
- confidence = 0.50, # Medium confidence for fallback
428
  risk_score = risk_score,
429
  negotiation_priority = negotiation_priority,
430
  suggested_improvements = suggested_improvements,
431
  )
432
 
433
 
434
- def _generate_overall_risk_explanation(self, overall_risk_score: int, contract_type: ContractType, unfavorable_terms: List[UnfavorableTerm],
435
- missing_protections: List[MissingProtection], provider: LLMProvider) -> str:
436
  """
437
- Generate overall risk explanation using LLM
438
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
  prompt = f"""
441
- As a legal risk analyst, provide a concise overall risk assessment.
442
 
443
- CONTRACT TYPE: {contract_type.value}
444
- OVERALL RISK SCORE: {overall_risk_score}/100
445
- UNFAVORABLE TERMS: {len(unfavorable_terms)}
446
- MISSING PROTECTIONS: {len(missing_protections)}
447
 
448
- Provide a 2-3 sentence plain-English explanation of what this risk score means for someone signing this contract. Focus on practical implications.
449
 
450
- Explanation:
451
- """
452
-
 
 
453
  try:
454
  response = self.llm_manager.complete(prompt = prompt,
455
  provider = provider,
456
  temperature = 0.2,
457
- max_tokens = 300,
458
- )
459
 
460
- return response.text.strip() if response.success else self._fallback_risk_explanation(overall_risk_score)
 
 
 
 
461
 
462
  except Exception as e:
463
- log_error(e, context = {"operation": "generate_overall_risk_explanation"})
464
  return self._fallback_risk_explanation(overall_risk_score)
465
 
466
 
@@ -488,25 +515,57 @@ class LLMClauseInterpreter:
488
  concerns = list()
489
 
490
  # From unfavorable terms
491
- critical_terms = [t for t in unfavorable_terms if (t.get("severity") == "critical")]
 
 
 
 
 
 
 
 
 
492
 
493
  # Top 10 critical terms
494
  for term in critical_terms[:10]:
495
- term_name = term.get('term', 'Unfavorable term')
496
- term_explanation = term.get('explanation', 'Standard risk identified')
 
 
 
 
 
 
 
 
497
 
498
  concerns.append(f"Critical: {term_name} - {term_explanation}")
499
 
500
  # From missing protections
501
- critical_protections = [p for p in missing_protections if (p.get("importance") == "critical")]
502
 
 
 
 
 
 
 
 
 
 
503
  # Top 10 critical protections
504
  for protection in critical_protections[:10]:
505
- protection_name = protection.get('protection', 'Critical protection')
 
 
 
506
 
 
 
 
507
  concerns.append(f"Missing: {protection_name}")
508
 
509
- # From clause interpretations (these are objects, so dot notation is OK here)
510
  high_priority_clauses = [c for c in clause_interpretations if (c.negotiation_priority == "high")]
511
 
512
  # Top 10 high priority clauses
@@ -522,7 +581,6 @@ class LLMClauseInterpreter:
522
  """
523
  Generate negotiation strategy using LLM
524
  """
525
-
526
  prompt = f"""
527
  As a negotiation expert, provide strategic advice for contract negotiations.
528
 
@@ -553,7 +611,6 @@ class LLMClauseInterpreter:
553
  """
554
  Generate market comparison context
555
  """
556
-
557
  prompt = f"""
558
  Provide market context for this contract type.
559
 
 
7
  from typing import Tuple
8
  from pathlib import Path
9
  from typing import Optional
 
10
 
11
  # Add parent directory to path for imports
12
  sys.path.append(str(Path(__file__).parent.parent))
 
17
  from config.risk_rules import ContractType
18
  from utils.logger import ContractAnalyzerLogger
19
  from model_manager.llm_manager import LLMManager
20
+ from services.data_models import UnfavorableTerm
21
  from model_manager.llm_manager import LLMProvider
22
+ from services.data_models import RiskInterpretation
23
+ from services.data_models import ClauseInterpretation
24
  from services.clause_extractor import ExtractedClause
25
  from services.protection_checker import MissingProtection
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  class LLMClauseInterpreter:
29
  """
30
  Uses LLM to generate plain-English explanations for legal clauses and integrated with RiskAnalyzer results and RiskRules framework
 
36
  Arguments:
37
  ----------
38
  llm_manager { LLMManager } : LLMManager instance
 
39
  default_provider { LLMProvider } : Default LLM provider to use
40
  """
41
  self.llm_manager = llm_manager
 
236
  key_points = result.get("key_points", []),
237
  potential_risks = result.get("potential_risks", []),
238
  favorability = result.get("favorability", "neutral"),
239
+ confidence_score = 0.85, # High confidence if LLM succeeded
240
  risk_score = getattr(clause, 'risk_score', 0),
241
  negotiation_priority = negotiation_priority,
242
  suggested_improvements = result.get("suggested_improvements", []),
 
259
 
260
  def _create_interpretation_prompt(self, clause: ExtractedClause) -> str:
261
  """
262
+ Create concise prompt for clause interpretation
263
  """
264
  risk_context = ""
265
 
266
  if clause.risk_indicators:
267
+ risk_context = f"\nRisk Keywords: {', '.join(clause.risk_indicators[:3])}"
268
 
269
  risk_score_context = ""
270
+
271
  if hasattr(clause, 'risk_score'):
272
+ if (clause.risk_score >= 70):
273
+ risk_level = "CRITICAL RISK"
274
+
275
+ elif (clause.risk_score >= 50):
276
+ risk_level = "HIGH RISK"
277
+
278
+ else:
279
+ risk_level = "Moderate risk"
280
+
281
+ risk_score_context = f"\nRisk Level: {risk_level} ({clause.risk_score}/100)"
282
 
283
  prompt = f"""
284
+ Explain this legal clause in plain English.
 
 
 
 
 
285
 
286
+ CLAUSE: {clause.reference} - {clause.category.replace('_', ' ').title()}{risk_score_context}{risk_context}
 
287
 
288
+ TEXT: "{clause.text}..."
289
 
290
+ Provide:
291
+ 1. SUMMARY: 1-2 sentences explaining what this means
292
+ 2. KEY_POINTS: 3 bullet points of what to know
293
+ 3. POTENTIAL_RISKS: 2-3 specific risks or concerns
294
+ 4. FAVORABILITY: "favorable", "neutral", or "unfavorable"
295
+ 5. IMPROVEMENTS: 2 specific suggestions to fix this
296
 
297
+ Keep each section CONCISE. Total response should be ~150 words.
298
 
299
+ Return ONLY valid JSON:
300
+ {{
301
+ "plain_english_summary": "...",
302
+ "key_points": ["...", "...", "..."],
303
+ "potential_risks": ["...", "..."],
304
+ "favorability": "unfavorable",
305
+ "suggested_improvements": ["...", "..."]
306
+ }}
307
  """
308
 
309
  return prompt
 
321
 
322
  else:
323
  return "low"
324
+
325
+
326
+ def _map_risk_score_to_level(self, risk_score: float) -> str:
327
+ """
328
+ Map numeric risk score to risk level string
329
+ """
330
+ if (risk_score >= 70):
331
+ return "critical"
332
+
333
+ elif (risk_score >= 50):
334
+ return "high"
335
+
336
+ elif (risk_score >= 30):
337
+ return "medium"
338
+
339
+ else:
340
+ return "low"
341
 
342
 
343
  def _fallback_interpretation(self, clause: ExtractedClause) -> ClauseInterpretation:
 
395
  key_points = key_points,
396
  potential_risks = potential_risks,
397
  favorability = favorability,
398
+ confidence_score = 0.50, # Medium confidence for fallback
399
  risk_score = risk_score,
400
  negotiation_priority = negotiation_priority,
401
  suggested_improvements = suggested_improvements,
402
  )
403
 
404
 
405
+ def _generate_overall_risk_explanation(self, overall_risk_score: int, contract_type: ContractType, unfavorable_terms: List[UnfavorableTerm], missing_protections: List[MissingProtection],
406
+ provider: LLMProvider) -> str:
407
  """
408
+ Generate concise overall risk explanation
409
  """
410
+ # Handle both object and dictionary formats for unfavorable_terms
411
+ critical_terms = list()
412
+ high_terms = list()
413
+ issues_summary = list()
414
+ critical_protections = list()
415
+
416
+ for term in unfavorable_terms:
417
+ severity = ""
418
+
419
+ if isinstance(term, UnfavorableTerm):
420
+ severity = term.severity
421
+
422
+ elif isinstance(term, dict):
423
+ severity = term.get('severity', '')
424
+
425
+ else:
426
+ severity = getattr(term, 'severity', '')
427
+
428
+ if (severity == "critical"):
429
+ critical_terms.append(term)
430
+
431
+ elif (severity == "high"):
432
+ high_terms.append(term)
433
+
434
+ # Handle both object and dictionary formats for missing_protections
435
+ for protection in missing_protections:
436
+ importance = ""
437
+
438
+ if isinstance(protection, MissingProtection):
439
+ importance = protection.importance
440
+
441
+ elif isinstance(protection, dict):
442
+ importance = protection.get('importance', '')
443
+
444
+ else:
445
+ importance = getattr(protection, 'importance', '')
446
+
447
+ if (importance == "critical"):
448
+ critical_protections.append(protection)
449
+
450
+ # Create issues summary
451
+ if critical_terms:
452
+ issues_summary.append(f"{len(critical_terms)} CRITICAL unfavorable terms")
453
+
454
+ if high_terms:
455
+ issues_summary.append(f"{len(high_terms)} HIGH-risk unfavorable terms")
456
+
457
+ if critical_protections:
458
+ issues_summary.append(f"{len(critical_protections)} CRITICAL missing protections")
459
+
460
+ if not issues_summary:
461
+ issues_summary = ["Multiple concerning provisions identified"]
462
 
463
  prompt = f"""
464
+ Risk Level: {overall_risk_score}/100 for {contract_type.value} contract
465
 
466
+ Top Issues:
467
+ {chr(10).join(issues_summary)}
 
 
468
 
469
+ Write ONE sentence (max 25 words) explaining what this risk score means for someone signing this contract.
470
 
471
+ Example: "This contract creates severe financial and legal exposure through unlimited liability and one-sided termination rights."
472
+
473
+ Your turn:
474
+ """
475
+
476
  try:
477
  response = self.llm_manager.complete(prompt = prompt,
478
  provider = provider,
479
  temperature = 0.2,
480
+ max_tokens = 100,
481
+ )
482
 
483
+ explanation = response.text.strip() if response.success else self._fallback_risk_explanation(overall_risk_score)
484
+
485
+ # Ensure single sentence
486
+ sentences = explanation.split('.')
487
+ return sentences[0].strip() + '.' if sentences else explanation
488
 
489
  except Exception as e:
490
+ log_error(e, context={"operation": "generate_overall_risk_explanation"})
491
  return self._fallback_risk_explanation(overall_risk_score)
492
 
493
 
 
515
  concerns = list()
516
 
517
  # From unfavorable terms
518
+ critical_terms = list()
519
+
520
+ for term in unfavorable_terms:
521
+ if isinstance(term, UnfavorableTerm):
522
+ if (term.severity == "critical"):
523
+ critical_terms.append(term)
524
+
525
+ elif isinstance(term, dict):
526
+ if (term.get("severity") == "critical"):
527
+ critical_terms.append(term)
528
 
529
  # Top 10 critical terms
530
  for term in critical_terms[:10]:
531
+ term_name = ""
532
+ term_explanation = ""
533
+
534
+ if isinstance(term, UnfavorableTerm):
535
+ term_name = term.term
536
+ term_explanation = term.explanation
537
+
538
+ elif isinstance(term, dict):
539
+ term_name = term.get('term', 'Unfavorable term')
540
+ term_explanation = term.get('explanation', 'Standard risk identified')
541
 
542
  concerns.append(f"Critical: {term_name} - {term_explanation}")
543
 
544
  # From missing protections
545
+ critical_protections = list()
546
 
547
+ for protection in missing_protections:
548
+ if isinstance(protection, MissingProtection):
549
+ if (protection.importance == "critical"):
550
+ critical_protections.append(protection)
551
+
552
+ elif isinstance(protection, dict):
553
+ if (protection.get("importance") == "critical"):
554
+ critical_protections.append(protection)
555
+
556
  # Top 10 critical protections
557
  for protection in critical_protections[:10]:
558
+ protection_name = ""
559
+
560
+ if isinstance(protection, MissingProtection):
561
+ protection_name = protection.protection
562
 
563
+ elif isinstance(protection, dict):
564
+ protection_name = protection.get('protection', 'Critical protection')
565
+
566
  concerns.append(f"Missing: {protection_name}")
567
 
568
+ # From clause interpretations
569
  high_priority_clauses = [c for c in clause_interpretations if (c.negotiation_priority == "high")]
570
 
571
  # Top 10 high priority clauses
 
581
  """
582
  Generate negotiation strategy using LLM
583
  """
 
584
  prompt = f"""
585
  As a negotiation expert, provide strategic advice for contract negotiations.
586
 
 
611
  """
612
  Generate market comparison context
613
  """
 
614
  prompt = f"""
615
  Provide market context for this contract type.
616
 
services/negotiation_engine.py CHANGED
@@ -2,14 +2,12 @@
2
  import re
3
  import sys
4
  import json
5
- from enum import Enum
6
  from typing import Any
7
  from typing import List
8
  from typing import Dict
9
  from typing import Tuple
10
  from pathlib import Path
11
  from typing import Optional
12
- from dataclasses import dataclass
13
 
14
  # Add parent directory to path for imports
15
  sys.path.append(str(Path(__file__).parent.parent))
@@ -21,92 +19,18 @@ from config.risk_rules import ContractType
21
  from services.risk_analyzer import RiskScore
22
  from utils.logger import ContractAnalyzerLogger
23
  from model_manager.llm_manager import LLMManager
 
24
  from model_manager.llm_manager import LLMProvider
25
- from services.term_analyzer import UnfavorableTerm
 
 
 
26
  from services.clause_extractor import ExtractedClause
27
  from services.llm_interpreter import RiskInterpretation
28
  from services.llm_interpreter import ClauseInterpretation
29
  from services.protection_checker import MissingProtection
30
 
31
 
32
- class NegotiationTactic(Enum):
33
- """
34
- Types of negotiation tactics
35
- """
36
- REMOVAL = "removal"
37
- MODIFICATION = "modification"
38
- ADDITION = "addition"
39
- LIMITATION = "limitation"
40
- MUTUALIZATION = "mutualization"
41
- CLARIFICATION = "clarification"
42
-
43
-
44
- @dataclass
45
- class NegotiationPoint:
46
- """
47
- Negotiation talking point with strategic context
48
- """
49
- priority : int # 1=highest, 5=lowest
50
- category : str
51
- issue : str
52
- current_language : str
53
- proposed_language : str
54
- rationale : str
55
- tactic : NegotiationTactic
56
- fallback_position : Optional[str] = None
57
- estimated_difficulty : str = "medium" # "easy", "medium", "hard"
58
- legal_basis : Optional[str] = None
59
- business_impact : Optional[str] = None
60
- counterparty_concerns : Optional[str] = None
61
- timing_suggestion : Optional[str] = None
62
- bargaining_chips : List[str] = None
63
-
64
- def to_dict(self) -> Dict[str, Any]:
65
- """
66
- Convert to dictionary
67
- """
68
- return {"priority" : self.priority,
69
- "category" : self.category,
70
- "issue" : self.issue,
71
- "current_language" : self.current_language,
72
- "proposed_language" : self.proposed_language,
73
- "rationale" : self.rationale,
74
- "tactic" : self.tactic.value,
75
- "fallback_position" : self.fallback_position,
76
- "estimated_difficulty" : self.estimated_difficulty,
77
- "legal_basis" : self.legal_basis,
78
- "business_impact" : self.business_impact,
79
- "counterparty_concerns" : self.counterparty_concerns,
80
- "timing_suggestion" : self.timing_suggestion,
81
- "bargaining_chips" : self.bargaining_chips or [],
82
- }
83
-
84
-
85
- @dataclass
86
- class NegotiationPlaybook:
87
- """
88
- Comprehensive negotiation strategy
89
- """
90
- overall_strategy : str
91
- critical_points : List[NegotiationPoint]
92
- walk_away_items : List[str]
93
- concession_items : List[str]
94
- timing_guidance : str
95
- risk_mitigation_plan : str
96
-
97
- def to_dict(self) -> Dict[str, Any]:
98
- """
99
- Convert to dictionary
100
- """
101
- return {"overall_strategy" : self.overall_strategy,
102
- "critical_points" : [point.to_dict() for point in self.critical_points],
103
- "walk_away_items" : self.walk_away_items,
104
- "concession_items" : self.concession_items,
105
- "timing_guidance" : self.timing_guidance,
106
- "risk_mitigation_plan" : self.risk_mitigation_plan,
107
- }
108
-
109
-
110
  class NegotiationEngine:
111
  """
112
  Generate intelligent negotiation strategy with LLM enhancement integrated with full analysis pipeline and RiskRules framework
@@ -114,28 +38,26 @@ class NegotiationEngine:
114
  def __init__(self, llm_manager: LLMManager, default_provider: LLMProvider = LLMProvider.OLLAMA):
115
  """
116
  Initialize negotiation engine
117
-
118
  Arguments:
119
  ----------
120
  llm_manager { LLMManager } : LLMManager instance
121
-
122
  default_provider { LLMProvider } : Default LLM provider
123
  """
124
  self.llm_manager = llm_manager
125
  self.default_provider = default_provider
126
  self.risk_rules = RiskRules()
127
  self.logger = ContractAnalyzerLogger.get_logger()
128
-
129
  log_info("NegotiationEngine initialized", default_provider = default_provider.value)
130
 
131
 
132
- # Main entry point with full pipeline integration
133
  @ContractAnalyzerLogger.log_execution_time("generate_comprehensive_playbook")
134
  def generate_comprehensive_playbook(self, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation, unfavorable_terms: List[UnfavorableTerm], missing_protections: List[MissingProtection],
135
  clauses: List[ExtractedClause], contract_type: ContractType, max_points: int = 10, provider: Optional[LLMProvider] = None) -> NegotiationPlaybook:
136
  """
137
  Generate comprehensive negotiation playbook using all analysis results
138
-
139
  Arguments:
140
  ----------
141
  risk_analysis : Complete risk analysis
@@ -146,12 +68,12 @@ class NegotiationEngine:
146
 
147
  missing_protections : Missing protections
148
 
149
- clauses : Extracted clauses with risk scores
150
-
151
  contract_type : Contract type for strategy
152
-
153
  max_points : Maximum negotiation points
154
-
155
  provider : LLM provider
156
 
157
  Returns:
@@ -177,7 +99,7 @@ class NegotiationEngine:
177
  contract_type = contract_type,
178
  provider = provider,
179
  )
180
-
181
  # Identify walk-away items
182
  walk_away_items = self._identify_walk_away_items(negotiation_points = negotiation_points,
183
  risk_analysis = risk_analysis,
@@ -207,9 +129,7 @@ class NegotiationEngine:
207
  timing_guidance = timing_guidance,
208
  risk_mitigation_plan = risk_mitigation_plan,
209
  )
210
-
211
  log_info("Comprehensive negotiation playbook generated", critical_points = len(negotiation_points), walk_away_items = len(walk_away_items))
212
-
213
  return playbook
214
 
215
 
@@ -218,19 +138,19 @@ class NegotiationEngine:
218
  clauses: List[ExtractedClause], max_points: int = 10, provider: Optional[LLMProvider] = None) -> List[NegotiationPoint]:
219
  """
220
  Generate prioritized negotiation strategy
221
-
222
  Arguments:
223
  ----------
224
  risk_analysis { RiskScore } : Risk analysis results
225
-
226
  unfavorable_terms { list } : Detected unfavorable terms
227
-
228
  missing_protections { list } : Missing protections
229
-
230
- clauses { list } : Extracted clauses
231
-
232
  max_points { int } : Maximum negotiation points to generate
233
-
234
  provider { LLMProvider } : LLM provider
235
 
236
  Returns:
@@ -241,14 +161,14 @@ class NegotiationEngine:
241
 
242
  # Convert dictionaries to objects if needed
243
  unfavorable_terms, missing_protections = self._ensure_objects(unfavorable_terms, missing_protections)
244
-
245
  log_info("Starting negotiation points generation", max_points = max_points, unfavorable_terms = len(unfavorable_terms), missing_protections = len(missing_protections))
246
 
247
  negotiation_points = list()
248
 
249
  # Critical unfavorable terms (walk-away level)
250
  critical_terms = [t for t in unfavorable_terms if (t.severity == "critical")]
251
-
252
  # Top-10 critical terms
253
  for term in critical_terms[:10]:
254
  point = self._create_enhanced_point_from_term(term, clauses, priority = 1)
@@ -264,7 +184,6 @@ class NegotiationEngine:
264
 
265
  # High unfavorable terms
266
  high_terms = [t for t in unfavorable_terms if (t.severity == "high")]
267
-
268
  for term in high_terms[:10]:
269
  point = self._create_enhanced_point_from_term(term, clauses, priority = 3)
270
  if point:
@@ -272,7 +191,6 @@ class NegotiationEngine:
272
 
273
  # High-risk categories from risk analysis
274
  high_risk_categories = self._get_high_risk_categories(risk_analysis)
275
-
276
  for category in high_risk_categories[:10]:
277
  point = self._create_category_strategy_point(category, risk_analysis, clauses, priority = 4)
278
  if point:
@@ -280,14 +198,12 @@ class NegotiationEngine:
280
 
281
  # Medium unfavorable terms and missing protections
282
  medium_terms = [t for t in unfavorable_terms if (t.severity == "medium")]
283
-
284
  for term in medium_terms[:10]:
285
  point = self._create_enhanced_point_from_term(term, clauses, priority=5)
286
  if point:
287
  negotiation_points.append(point)
288
 
289
  medium_protections = [p for p in missing_protections if (p.importance == "medium")]
290
-
291
  for protection in medium_protections[:10]:
292
  point = self._create_enhanced_point_from_protection(protection, priority = 5)
293
  negotiation_points.append(point)
@@ -297,20 +213,26 @@ class NegotiationEngine:
297
  risk_analysis,
298
  provider,
299
  )
300
-
301
  log_info(f"Negotiation points generation complete", total_points = len(enhanced_points))
302
 
303
  return enhanced_points[:max_points]
304
-
305
 
306
  def _create_enhanced_point_from_term(self, term: UnfavorableTerm, clauses: List[ExtractedClause], priority: int) -> Optional[NegotiationPoint]:
307
  """
308
- Create enhanced negotiation point from unfavorable term
309
  """
 
310
  clause = next((c for c in clauses if (c.reference == term.clause_reference)), None)
 
 
 
 
 
311
  if not clause:
 
312
  return None
313
-
314
  current = clause.text
315
 
316
  # Determine negotiation tactic
@@ -341,15 +263,15 @@ class NegotiationEngine:
341
  counterparty_concerns = counterparty_concerns,
342
  timing_suggestion = timing,
343
  bargaining_chips = self._suggest_bargaining_chips(term, tactic),
344
- )
 
345
 
346
-
347
  def _create_enhanced_point_from_protection(self, protection: MissingProtection, priority: int) -> NegotiationPoint:
348
  """
349
  Create enhanced negotiation point from missing protection
350
  """
351
  difficulty = "medium" if (protection.importance == "critical") else "easy"
352
-
353
  return NegotiationPoint(priority = priority,
354
  category = protection.categories[0] if protection.categories else "general",
355
  issue = f"Add {protection.protection}",
@@ -363,24 +285,28 @@ class NegotiationEngine:
363
  business_impact = f"Missing this protection creates {protection.risk_score}/100 risk exposure",
364
  timing_suggestion = "Early in negotiations - establishes baseline protections",
365
  bargaining_chips = ["Offer to review their standard protections in return"],
366
- )
367
-
368
 
369
  def _create_category_strategy_point(self, category: str, risk_analysis: RiskScore, clauses: List[ExtractedClause], priority: int) -> Optional[NegotiationPoint]:
370
  """
371
- Create strategic negotiation point for high-risk category
372
  """
373
- category_clauses = [c for c in clauses if self._matches_risk_category(c.category, category)]
 
374
  if not category_clauses:
 
375
  return None
376
-
377
  score = risk_analysis.category_scores.get(category, 0)
378
- description = self.risk_rules.CATEGORY_DESCRIPTIONS.get(category, {}).get("high", "")
 
 
379
 
380
  return NegotiationPoint(priority = priority,
381
  category = category,
382
  issue = f"Address {category.replace('_', ' ')} risks (score: {score}/100)",
383
- current_language = f"Multiple clauses in {category} category present elevated risk",
384
  proposed_language = f"Request balanced, market-standard terms for {category.replace('_', ' ')} provisions",
385
  rationale = description,
386
  tactic = NegotiationTactic.MODIFICATION,
@@ -389,28 +315,27 @@ class NegotiationEngine:
389
  timing_suggestion = "Mid-negotiations after establishing rapport",
390
  )
391
 
392
-
393
  def _determine_negotiation_tactic(self, term: UnfavorableTerm, clause: ExtractedClause) -> NegotiationTactic:
394
  """
395
  Determine the best negotiation tactic for this term
396
  """
397
  text_lower = clause.text.lower()
398
-
399
  if (("unlimited" in text_lower) or ("sole discretion" in text_lower)):
400
  return NegotiationTactic.LIMITATION
401
-
402
  elif (("indemnify" in text_lower) and ("mutual" not in text_lower)):
403
  return NegotiationTactic.MUTUALIZATION
404
-
405
  elif (any(word in text_lower for word in ["forfeit", "penalty", "liquidated damages"])):
406
  return NegotiationTactic.REMOVAL
407
-
408
  elif (("vague" in term.explanation.lower()) or ("ambiguous" in term.explanation.lower())):
409
  return NegotiationTactic.CLARIFICATION
410
-
411
  else:
412
  return NegotiationTactic.MODIFICATION
413
-
414
 
415
  def _generate_enhanced_proposed_language(self, term: UnfavorableTerm, clause: ExtractedClause, tactic: NegotiationTactic) -> str:
416
  """
@@ -422,46 +347,46 @@ class NegotiationEngine:
422
  NegotiationTactic.CLARIFICATION : "Clarify: 'For purposes of this section, [TERM] means [CLEAR DEFINITION]'",
423
  NegotiationTactic.MODIFICATION : "Modify to: '[BALANCED, MARKET-STANDARD LANGUAGE]'",
424
  }
425
-
426
  base_template = language_templates.get(tactic, term.suggested_fix or "[Request balanced language]")
427
 
428
  # Enhance with specific examples based on term type
429
  if ("non-compete" in term.term.lower()):
430
  return "Limit to: (a) 6-12 month duration, (b) direct competitors only, (c) reasonable geographic scope"
431
-
432
  elif ("liability" in term.term.lower()):
433
  return "Add: 'Total liability capped at the greater of $[AMOUNT] or fees paid in preceding 12 months'"
434
-
435
  elif ("termination" in term.term.lower()):
436
  return "Modify to provide mutual [30-60] day notice period and clear 'for cause' definition"
437
 
438
  return base_template
439
 
440
-
441
  def _calculate_negotiation_difficulty(self, term: UnfavorableTerm, tactic: NegotiationTactic) -> str:
442
  """
443
  Calculate negotiation difficulty
444
  """
445
  if ((term.severity == "critical") and (tactic == NegotiationTactic.REMOVAL)):
446
  return "hard"
447
-
448
  elif ((term.severity == "high") or (tactic == NegotiationTactic.MUTUALIZATION)):
449
  return "medium"
450
-
451
  else:
452
  return "easy"
453
 
454
-
455
  def _generate_business_impact(self, term: UnfavorableTerm, clause: ExtractedClause) -> str:
456
  """
457
  Generate business impact analysis
458
  """
459
  if (term.severity == "critical"):
460
  return "Could result in significant financial exposure or business restrictions"
461
-
462
  elif (term.severity == "high"):
463
- return "Creatures substantial operational risk or compliance burden"
464
-
465
  else:
466
  return "Standard business risk that should be managed"
467
 
@@ -475,9 +400,9 @@ class NegotiationEngine:
475
  NegotiationTactic.MUTUALIZATION : "They may prefer one-sided advantage",
476
  NegotiationTactic.CLARIFICATION : "They may prefer ambiguity for flexibility",
477
  }
478
-
479
  return concerns.get(tactic, "Standard negotiation resistance expected")
480
-
481
 
482
  def _suggest_timing(self, priority: int, tactic: NegotiationTactic) -> str:
483
  """
@@ -488,30 +413,30 @@ class NegotiationEngine:
488
 
489
  elif (tactic == NegotiationTactic.ADDITION):
490
  return "Early in negotiations - establishes baseline"
491
-
492
  else:
493
  return "Mid-negotiations - after establishing key terms"
494
-
495
 
496
  def _suggest_bargaining_chips(self, term: UnfavorableTerm, tactic: NegotiationTactic) -> List[str]:
497
  """
498
  Suggest bargaining chips
499
  """
500
  chips = list()
501
-
502
  if (tactic == NegotiationTactic.REMOVAL):
503
  chips.append("Offer alternative protection that addresses their underlying concern")
504
-
505
  elif (tactic == NegotiationTactic.LIMITATION):
506
  chips.append("Accept their position with reasonable cap or standard")
507
-
508
  elif (tactic == NegotiationTactic.MUTUALIZATION):
509
  chips.append("Frame as fairness principle benefiting both parties")
510
 
511
  chips.append("Trade for lower priority item they care about")
512
 
513
  return chips
514
-
515
 
516
  def _generate_strategic_fallback(self, term: UnfavorableTerm, tactic: NegotiationTactic) -> str:
517
  """
@@ -519,10 +444,10 @@ class NegotiationEngine:
519
  """
520
  if (term.severity == "critical"):
521
  return "If no compromise, seriously consider walking away - this creates unacceptable risk"
522
-
523
  elif (term.severity == "high"):
524
  return "If they refuse, document objection and consider risk mitigation strategies"
525
-
526
  else:
527
  return "If they won't budge, assess if other favorable terms compensate for this risk"
528
 
@@ -531,12 +456,13 @@ class NegotiationEngine:
531
  """
532
  Convert dictionaries back to proper objects if needed
533
  """
534
- if unfavorable_terms and isinstance(unfavorable_terms[0], dict):
535
- from services.term_analyzer import UnfavorableTerm
536
  unfavorable_terms = [UnfavorableTerm(**term_dict) for term_dict in unfavorable_terms]
537
 
 
538
  if missing_protections and isinstance(missing_protections[0], dict):
539
- from services.protection_checker import MissingProtection
540
  missing_protections = [MissingProtection(**prot_dict) for prot_dict in missing_protections]
541
 
542
  return unfavorable_terms, missing_protections
@@ -548,53 +474,31 @@ class NegotiationEngine:
548
  """
549
  if (protection.importance == "critical"):
550
  return "If they refuse, document this material gap and assess deal viability"
551
-
552
  else:
553
  return "If they refuse, note the gap and consider if other protections compensate"
554
-
555
 
556
  def _get_high_risk_categories(self, risk_analysis: RiskScore) -> List[str]:
557
  """
558
  Get high-risk categories from risk analysis
559
  """
560
- return [cat for cat, score in risk_analysis.category_scores.items() if (score >= self.risk_rules.RISK_THRESHOLDS["high"])]
561
-
562
-
563
- def _matches_risk_category(self, clause_category: str, risk_category: str) -> bool:
564
- """
565
- Category matching
566
- """
567
- mapping = {"restrictive_covenants" : ["non_compete", "confidentiality"],
568
- "termination_rights" : ["termination"],
569
- "penalties_liability" : ["indemnification", "liability"],
570
- "compensation_benefits" : ["compensation"],
571
- "intellectual_property" : ["intellectual_property"],
572
- "confidentiality" : ["confidentiality"],
573
- "liability_indemnity" : ["indemnification", "liability"],
574
- "governing_law" : ["dispute_resolution"],
575
- "payment_terms" : ["compensation"],
576
- "warranties" : ["warranty"],
577
- "dispute_resolution" : ["dispute_resolution"],
578
- "assignment_change" : ["assignment", "amendment"],
579
- "insurance" : ["insurance"],
580
- "force_majeure" : ["force_majeure"],
581
- }
582
 
583
- return clause_category in mapping.get(risk_category, [])
584
 
585
-
586
  def _enhance_with_llm_strategy(self, points: List[NegotiationPoint], risk_analysis: RiskScore, provider: LLMProvider) -> List[NegotiationPoint]:
587
  """
588
  Use LLM to enhance negotiation points with sophisticated strategy
589
  """
590
  if not points:
591
  return points
592
-
593
  log_info(f"Enhancing {len(points)} negotiation points with LLM strategy")
594
 
595
  try:
596
  prompt = self._create_strategic_enhancement_prompt(points, risk_analysis)
597
-
598
  response = self.llm_manager.complete(prompt = prompt,
599
  provider = provider,
600
  temperature = 0.3,
@@ -602,7 +506,6 @@ class NegotiationEngine:
602
  fallback_providers = [LLMProvider.OPENAI],
603
  retry_on_error = True,
604
  )
605
-
606
  if response.success:
607
  enhanced = self._parse_strategic_enhancements(response.text, points)
608
  log_info("LLM strategic enhancement successful")
@@ -615,8 +518,8 @@ class NegotiationEngine:
615
  except Exception as e:
616
  log_error(e, context = {"component": "NegotiationEngine", "operation": "enhance_with_llm_strategy"})
617
  return points
618
-
619
 
 
620
  def _create_strategic_enhancement_prompt(self, points: List[NegotiationPoint], risk_analysis: RiskScore) -> str:
621
  """
622
  Create prompt for strategic LLM enhancement
@@ -626,7 +529,7 @@ class NegotiationEngine:
626
  "points" : [{"priority" : p.priority,
627
  "issue" : p.issue,
628
  "category" : p.category,
629
- "current" : p.current_language[:150],
630
  "proposed" : p.proposed_language,
631
  "tactic" : p.tactic.value,
632
  "difficulty" : p.estimated_difficulty
@@ -634,69 +537,88 @@ class NegotiationEngine:
634
  for p in points
635
  ],
636
  }
637
-
638
  prompt = f"""
639
  As an expert negotiation strategist, enhance these negotiation points with sophisticated strategy.
640
-
641
  CONTRACT RISK: {context['overall_risk']}/100 ({context['risk_level']})
642
-
643
- NEGOTIATION POINTS:
644
  {json.dumps(context['points'], indent=2)}
645
-
646
- For EACH point (keep same numbering 1, 2, 3...), provide:
647
- 1. ENHANCED_PROPOSAL: More specific, legally sound alternative language
648
- 2. STRATEGIC_RATIONALE: Business-focused reasoning emphasizing mutual benefit
649
- 3. COUNTERPARTY_PERSPECTIVE: Their likely concerns and how to address them
650
- 4. TIMING_STRATEGY: When and how to raise this issue
651
- 5. BARGAINING_CHIPS: Specific trade-offs or concessions
652
-
653
- Focus on creating win-win solutions and practical negotiation tactics.
 
 
 
 
 
 
 
654
  """
655
-
656
  return prompt
657
-
658
 
659
  def _parse_strategic_enhancements(self, llm_text: str, original_points: List[NegotiationPoint]) -> List[NegotiationPoint]:
660
  """
661
- Parse LLM strategic enhancements
662
  """
663
  enhanced = list()
664
-
665
  for i, point in enumerate(original_points):
666
- # Extract enhanced proposal
667
- proposal_pattern = rf"{i+1}[.\)].*?ENHANCED_PROPOSAL:\s*(.*?)(?:STRATEGIC_RATIONALE:|COUNTERPARTY_PERSPECTIVE:|TIMING_STRATEGY:|BARGAINING_CHIPS:|{i+2}\.|$)"
668
- proposal_match = re.search(proposal_pattern, llm_text, re.IGNORECASE | re.DOTALL)
669
-
670
- if proposal_match:
671
- enhanced_proposal = proposal_match.group(1).strip()
672
 
673
- if (enhanced_proposal and (len(enhanced_proposal) > 30)):
674
- point.proposed_language = enhanced_proposal[:600]
675
-
676
- # Extract timing strategy
677
- timing_pattern = rf"{i+1}[.\)].*?TIMING_STRATEGY:\s*(.*?)(?:BARGAINING_CHIPS:|{i+2}\.|$)"
678
- timing_match = re.search(timing_pattern, llm_text, re.IGNORECASE | re.DOTALL)
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  if timing_match:
681
- timing = timing_match.group(1).strip()
682
- if (timing and (len(timing) > 10)):
683
- point.timing_suggestion = timing[:200]
684
-
685
- # Extract bargaining chips
686
- chips_pattern = rf"{i+1}[.\)].*?BARGAINING_CHIPS:\s*(.*?)(?:{i+2}\.|$)"
687
- chips_match = re.search(chips_pattern, llm_text, re.IGNORECASE | re.DOTALL)
688
-
689
  if chips_match:
690
- chips_text = chips_match.group(1).strip()
691
- if chips_text:
692
- # Parse chips as list items or comma-separated
693
- chips = [chip.strip() for chip in re.split(r'[,-•]', chips_text) if chip.strip()]
694
- point.bargaining_chips = chips[:3] # Keep top 3
695
-
 
 
 
696
  enhanced.append(point)
697
-
698
  return enhanced
699
-
700
 
701
  def _generate_overall_strategy(self, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation, contract_type: ContractType, provider: LLMProvider) -> str:
702
  """
@@ -704,20 +626,16 @@ class NegotiationEngine:
704
  """
705
  prompt = f"""
706
  As a negotiation expert, provide overall strategy for this contract.
707
-
708
  CONTRACT TYPE: {contract_type.value}
709
  RISK LEVEL: {risk_analysis.overall_score}/100 ({risk_analysis.risk_level})
710
  KEY CONCERNS: {risk_interpretation.key_concerns}
711
-
712
  Provide a concise 3-4 sentence negotiation strategy focusing on:
713
  - Overall approach (collaborative vs. firm)
714
  - Key priorities
715
  - Risk management
716
  - Success metrics
717
-
718
  Strategy:
719
  """
720
-
721
  try:
722
  response = self.llm_manager.complete(prompt = prompt,
723
  provider = provider,
@@ -726,44 +644,40 @@ class NegotiationEngine:
726
  )
727
 
728
  return response.text.strip() if response.success else "Focus on addressing critical risks while maintaining collaborative negotiation tone."
729
-
730
  except Exception as e:
731
  log_error(e, context = {"operation": "generate_overall_strategy"})
732
-
733
  return "Prioritize critical risk items while seeking balanced, market-standard terms."
734
-
735
 
736
  def _identify_walk_away_items(self, negotiation_points: List[NegotiationPoint], risk_analysis: RiskScore) -> List[str]:
737
  """
738
  Identify non-negotiable walk-away items
739
  """
740
  walk_away = list()
741
-
742
  critical_points = [p for p in negotiation_points if (p.priority == 1)]
743
 
744
  for point in critical_points:
745
  if ((point.estimated_difficulty == "hard") and (risk_analysis.overall_score >= 70)):
746
  walk_away.append(f"{point.issue} - critical risk that cannot be mitigated")
747
-
748
  # Max 5 walk-away items
749
  return walk_away[:5]
750
-
751
 
752
- def _identify_concession_items(self, negotiation_points: List[NegotiationPoint],
753
- risk_analysis: RiskScore) -> List[str]:
754
  """
755
  Identify items that can be conceded
756
  """
757
  concessions = list()
758
-
759
- low_priority = [p for p in negotiation_points if p.priority >= 4]
760
 
761
  for point in low_priority[:2]:
762
  if (point.estimated_difficulty == "hard"):
763
  concessions.append(f"{point.issue} - lower priority, high difficulty")
764
-
765
  return concessions
766
-
767
 
768
  def _generate_timing_guidance(self, negotiation_points: List[NegotiationPoint], contract_type: ContractType, provider: LLMProvider) -> str:
769
  """
@@ -779,7 +693,7 @@ class NegotiationEngine:
779
 
780
  else:
781
  return "Progressive approach: start with easier wins to build momentum"
782
-
783
 
784
  def _generate_risk_mitigation_plan(self, risk_analysis: RiskScore, negotiation_points: List[NegotiationPoint], provider: LLMProvider) -> str:
785
  """
@@ -787,135 +701,9 @@ class NegotiationEngine:
787
  """
788
  if (risk_analysis.overall_score >= 70):
789
  return "High risk level - focus on critical term resolution. Have fallback positions ready."
790
-
791
  elif (risk_analysis.overall_score >= 50):
792
  return "Moderate risk - prioritize 2-3 key improvements. Document remaining risks."
793
-
794
  else:
795
  return "Manageable risk level - focus on most impactful improvements."
796
-
797
-
798
- # Keep existing utility methods for backward compatibility
799
- def generate_negotiation_strategy_document(self, playbook: NegotiationPlaybook) -> str:
800
- """
801
- Generate a formatted negotiation strategy document
802
-
803
- Returns:
804
- -------
805
- Formatted markdown document
806
- """
807
- doc = ["# Comprehensive Negotiation Playbook",
808
- "",
809
- f"## Overall Strategy",
810
- f"{playbook.overall_strategy}",
811
- "",
812
- "## Critical Negotiation Points",
813
- ""
814
- ]
815
-
816
- # Group by priority with enhanced labels
817
- by_priority = dict()
818
-
819
- for point in playbook.critical_points:
820
- if point.priority not in by_priority:
821
- by_priority[point.priority] = []
822
-
823
- by_priority[point.priority].append(point)
824
-
825
- priority_labels = {1: "🔴 CRITICAL PRIORITY - Deal Breakers",
826
- 2: "🟠 HIGH PRIORITY - Essential Items",
827
- 3: "🟡 MEDIUM PRIORITY - Important Improvements",
828
- 4: "🟢 STANDARD PRIORITY - Recommended Changes",
829
- 5: "⚪ LOW PRIORITY - Optional Improvements"
830
- }
831
-
832
- for priority in sorted(by_priority.keys()):
833
- doc.append(f"### {priority_labels.get(priority, f'Priority {priority}')}")
834
- doc.append("")
835
-
836
- for point in by_priority[priority]:
837
- doc.append(f"#### {point.issue}")
838
- doc.append(f"**Category:** {point.category} | **Tactic:** {point.tactic.value} | **Difficulty:** {point.estimated_difficulty}")
839
- doc.append("")
840
- doc.append("**Current Language:**")
841
- doc.append(f"> {point.current_language}")
842
- doc.append("")
843
- doc.append("**Proposed Language:**")
844
- doc.append(f"{point.proposed_language}")
845
- doc.append("")
846
- doc.append("**Rationale:**")
847
- doc.append(f"{point.rationale}")
848
- doc.append("")
849
-
850
- if point.business_impact:
851
- doc.append("**Business Impact:**")
852
- doc.append(f"{point.business_impact}")
853
- doc.append("")
854
-
855
- if point.timing_suggestion:
856
- doc.append("**Timing:**")
857
- doc.append(f"{point.timing_suggestion}")
858
- doc.append("")
859
-
860
- if point.bargaining_chips:
861
- doc.append("**Bargaining Chips:**")
862
- for chip in point.bargaining_chips:
863
- doc.append(f"- {chip}")
864
- doc.append("")
865
-
866
- if point.fallback_position:
867
- doc.append("**Fallback Position:**")
868
- doc.append(f"{point.fallback_position}")
869
- doc.append("")
870
-
871
- doc.append("---")
872
- doc.append("")
873
-
874
- # Add strategy sections
875
- if playbook.walk_away_items:
876
- doc.append("## 🚫 Walk-Away Items")
877
- doc.append("Do not proceed if these cannot be resolved:")
878
-
879
- for item in playbook.walk_away_items:
880
- doc.append(f"- {item}")
881
-
882
- doc.append("")
883
-
884
- if playbook.concession_items:
885
- doc.append("## 💰 Concession Items")
886
- doc.append("Consider conceding these if needed:")
887
-
888
- for item in playbook.concession_items:
889
- doc.append(f"- {item}")
890
-
891
- doc.append("")
892
-
893
- doc.append("## ⏰ Timing Guidance")
894
- doc.append(playbook.timing_guidance)
895
- doc.append("")
896
-
897
- doc.append("## Risk Mitigation Plan")
898
- doc.append(playbook.risk_mitigation_plan)
899
-
900
- return "\n".join(doc)
901
-
902
-
903
- def get_critical_points(self, points: List[NegotiationPoint]) -> List[NegotiationPoint]:
904
- """
905
- Filter to only priority 1-2 points
906
- """
907
- critical = [p for p in points if p.priority <= 2]
908
- log_info(f"Found {len(critical)} critical negotiation points")
909
-
910
- return critical
911
-
912
-
913
- def get_points_by_category(self, points: List[NegotiationPoint],
914
- category: str) -> List[NegotiationPoint]:
915
- """
916
- Filter points by category
917
- """
918
- filtered = [p for p in points if (p.category == category)]
919
- log_info(f"Found {len(filtered)} negotiation points in category '{category}'")
920
-
921
- return filtered
 
2
  import re
3
  import sys
4
  import json
 
5
  from typing import Any
6
  from typing import List
7
  from typing import Dict
8
  from typing import Tuple
9
  from pathlib import Path
10
  from typing import Optional
 
11
 
12
  # Add parent directory to path for imports
13
  sys.path.append(str(Path(__file__).parent.parent))
 
19
  from services.risk_analyzer import RiskScore
20
  from utils.logger import ContractAnalyzerLogger
21
  from model_manager.llm_manager import LLMManager
22
+ from services.data_models import UnfavorableTerm
23
  from model_manager.llm_manager import LLMProvider
24
+ from services.data_models import NegotiationPoint
25
+ from services.data_models import NegotiationTactic
26
+ from services.data_models import MissingProtection
27
+ from services.data_models import NegotiationPlaybook
28
  from services.clause_extractor import ExtractedClause
29
  from services.llm_interpreter import RiskInterpretation
30
  from services.llm_interpreter import ClauseInterpretation
31
  from services.protection_checker import MissingProtection
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  class NegotiationEngine:
35
  """
36
  Generate intelligent negotiation strategy with LLM enhancement integrated with full analysis pipeline and RiskRules framework
 
38
  def __init__(self, llm_manager: LLMManager, default_provider: LLMProvider = LLMProvider.OLLAMA):
39
  """
40
  Initialize negotiation engine
 
41
  Arguments:
42
  ----------
43
  llm_manager { LLMManager } : LLMManager instance
44
+
45
  default_provider { LLMProvider } : Default LLM provider
46
  """
47
  self.llm_manager = llm_manager
48
  self.default_provider = default_provider
49
  self.risk_rules = RiskRules()
50
  self.logger = ContractAnalyzerLogger.get_logger()
51
+
52
  log_info("NegotiationEngine initialized", default_provider = default_provider.value)
53
 
54
 
 
55
  @ContractAnalyzerLogger.log_execution_time("generate_comprehensive_playbook")
56
  def generate_comprehensive_playbook(self, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation, unfavorable_terms: List[UnfavorableTerm], missing_protections: List[MissingProtection],
57
  clauses: List[ExtractedClause], contract_type: ContractType, max_points: int = 10, provider: Optional[LLMProvider] = None) -> NegotiationPlaybook:
58
  """
59
  Generate comprehensive negotiation playbook using all analysis results
60
+
61
  Arguments:
62
  ----------
63
  risk_analysis : Complete risk analysis
 
68
 
69
  missing_protections : Missing protections
70
 
71
+ clauses : Extracted clauses with risk scores (should be risk-category based)
72
+
73
  contract_type : Contract type for strategy
74
+
75
  max_points : Maximum negotiation points
76
+
77
  provider : LLM provider
78
 
79
  Returns:
 
99
  contract_type = contract_type,
100
  provider = provider,
101
  )
102
+
103
  # Identify walk-away items
104
  walk_away_items = self._identify_walk_away_items(negotiation_points = negotiation_points,
105
  risk_analysis = risk_analysis,
 
129
  timing_guidance = timing_guidance,
130
  risk_mitigation_plan = risk_mitigation_plan,
131
  )
 
132
  log_info("Comprehensive negotiation playbook generated", critical_points = len(negotiation_points), walk_away_items = len(walk_away_items))
 
133
  return playbook
134
 
135
 
 
138
  clauses: List[ExtractedClause], max_points: int = 10, provider: Optional[LLMProvider] = None) -> List[NegotiationPoint]:
139
  """
140
  Generate prioritized negotiation strategy
141
+
142
  Arguments:
143
  ----------
144
  risk_analysis { RiskScore } : Risk analysis results
145
+
146
  unfavorable_terms { list } : Detected unfavorable terms
147
+
148
  missing_protections { list } : Missing protections
149
+
150
+ clauses { list } : Extracted clauses (ideally with risk categories)
151
+
152
  max_points { int } : Maximum negotiation points to generate
153
+
154
  provider { LLMProvider } : LLM provider
155
 
156
  Returns:
 
161
 
162
  # Convert dictionaries to objects if needed
163
  unfavorable_terms, missing_protections = self._ensure_objects(unfavorable_terms, missing_protections)
164
+
165
  log_info("Starting negotiation points generation", max_points = max_points, unfavorable_terms = len(unfavorable_terms), missing_protections = len(missing_protections))
166
 
167
  negotiation_points = list()
168
 
169
  # Critical unfavorable terms (walk-away level)
170
  critical_terms = [t for t in unfavorable_terms if (t.severity == "critical")]
171
+
172
  # Top-10 critical terms
173
  for term in critical_terms[:10]:
174
  point = self._create_enhanced_point_from_term(term, clauses, priority = 1)
 
184
 
185
  # High unfavorable terms
186
  high_terms = [t for t in unfavorable_terms if (t.severity == "high")]
 
187
  for term in high_terms[:10]:
188
  point = self._create_enhanced_point_from_term(term, clauses, priority = 3)
189
  if point:
 
191
 
192
  # High-risk categories from risk analysis
193
  high_risk_categories = self._get_high_risk_categories(risk_analysis)
 
194
  for category in high_risk_categories[:10]:
195
  point = self._create_category_strategy_point(category, risk_analysis, clauses, priority = 4)
196
  if point:
 
198
 
199
  # Medium unfavorable terms and missing protections
200
  medium_terms = [t for t in unfavorable_terms if (t.severity == "medium")]
 
201
  for term in medium_terms[:10]:
202
  point = self._create_enhanced_point_from_term(term, clauses, priority=5)
203
  if point:
204
  negotiation_points.append(point)
205
 
206
  medium_protections = [p for p in missing_protections if (p.importance == "medium")]
 
207
  for protection in medium_protections[:10]:
208
  point = self._create_enhanced_point_from_protection(protection, priority = 5)
209
  negotiation_points.append(point)
 
213
  risk_analysis,
214
  provider,
215
  )
 
216
  log_info(f"Negotiation points generation complete", total_points = len(enhanced_points))
217
 
218
  return enhanced_points[:max_points]
219
+
220
 
221
  def _create_enhanced_point_from_term(self, term: UnfavorableTerm, clauses: List[ExtractedClause], priority: int) -> Optional[NegotiationPoint]:
222
  """
223
+ Create enhanced negotiation point from unfavorable term from the clauses that are extracted by from RiskClauseExtractor having risk categories
224
  """
225
+ # Find clause by reference
226
  clause = next((c for c in clauses if (c.reference == term.clause_reference)), None)
227
+
228
+ # If not found by reference, try finding by matching risk category (if term.category is a risk category)
229
+ if not clause:
230
+ clause = next((c for c in clauses if (c.category == term.category)), None) # term.category should be a risk category from TermAnalyzer
231
+
232
  if not clause:
233
+ log_info(f"Could not find clause for term: {term.term} in category: {term.category}", clause_reference=term.clause_reference)
234
  return None
235
+
236
  current = clause.text
237
 
238
  # Determine negotiation tactic
 
263
  counterparty_concerns = counterparty_concerns,
264
  timing_suggestion = timing,
265
  bargaining_chips = self._suggest_bargaining_chips(term, tactic),
266
+ )
267
+
268
 
 
269
  def _create_enhanced_point_from_protection(self, protection: MissingProtection, priority: int) -> NegotiationPoint:
270
  """
271
  Create enhanced negotiation point from missing protection
272
  """
273
  difficulty = "medium" if (protection.importance == "critical") else "easy"
274
+
275
  return NegotiationPoint(priority = priority,
276
  category = protection.categories[0] if protection.categories else "general",
277
  issue = f"Add {protection.protection}",
 
285
  business_impact = f"Missing this protection creates {protection.risk_score}/100 risk exposure",
286
  timing_suggestion = "Early in negotiations - establishes baseline protections",
287
  bargaining_chips = ["Offer to review their standard protections in return"],
288
+ )
289
+
290
 
291
  def _create_category_strategy_point(self, category: str, risk_analysis: RiskScore, clauses: List[ExtractedClause], priority: int) -> Optional[NegotiationPoint]:
292
  """
293
+ Create strategic negotiation point for high-risk category, where clauses are from RiskClauseExtractor and have risk categories
294
  """
295
+ # Find clauses that belong to this *risk* category
296
+ category_clauses = [c for c in clauses if c.category == category] # Direct match on risk category
297
  if not category_clauses:
298
+ log_info(f"No clauses found for high-risk category: {category}", available_categories=[c.category for c in clauses])
299
  return None
300
+
301
  score = risk_analysis.category_scores.get(category, 0)
302
+
303
+ # Use high description as default for high-risk
304
+ description = self.risk_rules.CATEGORY_DESCRIPTIONS.get(category, {}).get("high", "")
305
 
306
  return NegotiationPoint(priority = priority,
307
  category = category,
308
  issue = f"Address {category.replace('_', ' ')} risks (score: {score}/100)",
309
+ current_language = f"Multiple clauses in {category} category present elevated risk (e.g., {category_clauses[0].reference}).",
310
  proposed_language = f"Request balanced, market-standard terms for {category.replace('_', ' ')} provisions",
311
  rationale = description,
312
  tactic = NegotiationTactic.MODIFICATION,
 
315
  timing_suggestion = "Mid-negotiations after establishing rapport",
316
  )
317
 
318
+
319
  def _determine_negotiation_tactic(self, term: UnfavorableTerm, clause: ExtractedClause) -> NegotiationTactic:
320
  """
321
  Determine the best negotiation tactic for this term
322
  """
323
  text_lower = clause.text.lower()
 
324
  if (("unlimited" in text_lower) or ("sole discretion" in text_lower)):
325
  return NegotiationTactic.LIMITATION
326
+
327
  elif (("indemnify" in text_lower) and ("mutual" not in text_lower)):
328
  return NegotiationTactic.MUTUALIZATION
329
+
330
  elif (any(word in text_lower for word in ["forfeit", "penalty", "liquidated damages"])):
331
  return NegotiationTactic.REMOVAL
332
+
333
  elif (("vague" in term.explanation.lower()) or ("ambiguous" in term.explanation.lower())):
334
  return NegotiationTactic.CLARIFICATION
335
+
336
  else:
337
  return NegotiationTactic.MODIFICATION
338
+
339
 
340
  def _generate_enhanced_proposed_language(self, term: UnfavorableTerm, clause: ExtractedClause, tactic: NegotiationTactic) -> str:
341
  """
 
347
  NegotiationTactic.CLARIFICATION : "Clarify: 'For purposes of this section, [TERM] means [CLEAR DEFINITION]'",
348
  NegotiationTactic.MODIFICATION : "Modify to: '[BALANCED, MARKET-STANDARD LANGUAGE]'",
349
  }
350
+
351
  base_template = language_templates.get(tactic, term.suggested_fix or "[Request balanced language]")
352
 
353
  # Enhance with specific examples based on term type
354
  if ("non-compete" in term.term.lower()):
355
  return "Limit to: (a) 6-12 month duration, (b) direct competitors only, (c) reasonable geographic scope"
356
+
357
  elif ("liability" in term.term.lower()):
358
  return "Add: 'Total liability capped at the greater of $[AMOUNT] or fees paid in preceding 12 months'"
359
+
360
  elif ("termination" in term.term.lower()):
361
  return "Modify to provide mutual [30-60] day notice period and clear 'for cause' definition"
362
 
363
  return base_template
364
 
365
+
366
  def _calculate_negotiation_difficulty(self, term: UnfavorableTerm, tactic: NegotiationTactic) -> str:
367
  """
368
  Calculate negotiation difficulty
369
  """
370
  if ((term.severity == "critical") and (tactic == NegotiationTactic.REMOVAL)):
371
  return "hard"
372
+
373
  elif ((term.severity == "high") or (tactic == NegotiationTactic.MUTUALIZATION)):
374
  return "medium"
375
+
376
  else:
377
  return "easy"
378
 
379
+
380
  def _generate_business_impact(self, term: UnfavorableTerm, clause: ExtractedClause) -> str:
381
  """
382
  Generate business impact analysis
383
  """
384
  if (term.severity == "critical"):
385
  return "Could result in significant financial exposure or business restrictions"
386
+
387
  elif (term.severity == "high"):
388
+ return "Creates substantial operational risk or compliance burden"
389
+
390
  else:
391
  return "Standard business risk that should be managed"
392
 
 
400
  NegotiationTactic.MUTUALIZATION : "They may prefer one-sided advantage",
401
  NegotiationTactic.CLARIFICATION : "They may prefer ambiguity for flexibility",
402
  }
403
+
404
  return concerns.get(tactic, "Standard negotiation resistance expected")
405
+
406
 
407
  def _suggest_timing(self, priority: int, tactic: NegotiationTactic) -> str:
408
  """
 
413
 
414
  elif (tactic == NegotiationTactic.ADDITION):
415
  return "Early in negotiations - establishes baseline"
416
+
417
  else:
418
  return "Mid-negotiations - after establishing key terms"
419
+
420
 
421
  def _suggest_bargaining_chips(self, term: UnfavorableTerm, tactic: NegotiationTactic) -> List[str]:
422
  """
423
  Suggest bargaining chips
424
  """
425
  chips = list()
426
+
427
  if (tactic == NegotiationTactic.REMOVAL):
428
  chips.append("Offer alternative protection that addresses their underlying concern")
429
+
430
  elif (tactic == NegotiationTactic.LIMITATION):
431
  chips.append("Accept their position with reasonable cap or standard")
432
+
433
  elif (tactic == NegotiationTactic.MUTUALIZATION):
434
  chips.append("Frame as fairness principle benefiting both parties")
435
 
436
  chips.append("Trade for lower priority item they care about")
437
 
438
  return chips
439
+
440
 
441
  def _generate_strategic_fallback(self, term: UnfavorableTerm, tactic: NegotiationTactic) -> str:
442
  """
 
444
  """
445
  if (term.severity == "critical"):
446
  return "If no compromise, seriously consider walking away - this creates unacceptable risk"
447
+
448
  elif (term.severity == "high"):
449
  return "If they refuse, document objection and consider risk mitigation strategies"
450
+
451
  else:
452
  return "If they won't budge, assess if other favorable terms compensate for this risk"
453
 
 
456
  """
457
  Convert dictionaries back to proper objects if needed
458
  """
459
+ if (unfavorable_terms and isinstance(unfavorable_terms[0], dict)):
460
+
461
  unfavorable_terms = [UnfavorableTerm(**term_dict) for term_dict in unfavorable_terms]
462
 
463
+
464
  if missing_protections and isinstance(missing_protections[0], dict):
465
+
466
  missing_protections = [MissingProtection(**prot_dict) for prot_dict in missing_protections]
467
 
468
  return unfavorable_terms, missing_protections
 
474
  """
475
  if (protection.importance == "critical"):
476
  return "If they refuse, document this material gap and assess deal viability"
477
+
478
  else:
479
  return "If they refuse, note the gap and consider if other protections compensate"
480
+
481
 
482
  def _get_high_risk_categories(self, risk_analysis: RiskScore) -> List[str]:
483
  """
484
  Get high-risk categories from risk analysis
485
  """
486
+ # Use the risk thresholds defined in RiskRules
487
+ high_threshold = self.risk_rules.RISK_THRESHOLDS.get("high", 60)
488
+ return [cat for cat, score in risk_analysis.category_scores.items() if (score >= high_threshold)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
 
 
490
 
 
491
  def _enhance_with_llm_strategy(self, points: List[NegotiationPoint], risk_analysis: RiskScore, provider: LLMProvider) -> List[NegotiationPoint]:
492
  """
493
  Use LLM to enhance negotiation points with sophisticated strategy
494
  """
495
  if not points:
496
  return points
497
+
498
  log_info(f"Enhancing {len(points)} negotiation points with LLM strategy")
499
 
500
  try:
501
  prompt = self._create_strategic_enhancement_prompt(points, risk_analysis)
 
502
  response = self.llm_manager.complete(prompt = prompt,
503
  provider = provider,
504
  temperature = 0.3,
 
506
  fallback_providers = [LLMProvider.OPENAI],
507
  retry_on_error = True,
508
  )
 
509
  if response.success:
510
  enhanced = self._parse_strategic_enhancements(response.text, points)
511
  log_info("LLM strategic enhancement successful")
 
518
  except Exception as e:
519
  log_error(e, context = {"component": "NegotiationEngine", "operation": "enhance_with_llm_strategy"})
520
  return points
 
521
 
522
+
523
  def _create_strategic_enhancement_prompt(self, points: List[NegotiationPoint], risk_analysis: RiskScore) -> str:
524
  """
525
  Create prompt for strategic LLM enhancement
 
529
  "points" : [{"priority" : p.priority,
530
  "issue" : p.issue,
531
  "category" : p.category,
532
+ "current" : p.current_language,
533
  "proposed" : p.proposed_language,
534
  "tactic" : p.tactic.value,
535
  "difficulty" : p.estimated_difficulty
 
537
  for p in points
538
  ],
539
  }
540
+
541
  prompt = f"""
542
  As an expert negotiation strategist, enhance these negotiation points with sophisticated strategy.
 
543
  CONTRACT RISK: {context['overall_risk']}/100 ({context['risk_level']})
544
+ NEGOTIATION POINTS (format: [{{'priority': int, 'issue': str, 'category': str, 'current': str, 'proposed': str, 'tactic': str, 'difficulty': str}}]):
 
545
  {json.dumps(context['points'], indent=2)}
546
+ For EACH point (match the order and priority), provide:
547
+ 1. ENHANCED_PROPOSAL: More specific, legally sound alternative language (only return the enhanced text).
548
+ 2. STRATEGIC_RATIONALE: Business-focused reasoning emphasizing mutual benefit (only return the rationale).
549
+ 3. COUNTERPARTY_PERSPECTIVE: Their likely concerns and how to address them (only return the perspective).
550
+ 4. TIMING_STRATEGY: When and how to raise this issue (only return the timing).
551
+ 5. BARGAINING_CHIPS: Specific trade-offs or concessions (only return the chips, as a comma-separated string).
552
+ Focus on creating win-win solutions and practical negotiation tactics. Respond in the exact format below for each point:
553
+ Point 1:
554
+ ENHANCED_PROPOSAL: [text]
555
+ STRATEGIC_RATIONALE: [text]
556
+ COUNTERPARTY_PERSPECTIVE: [text]
557
+ TIMING_STRATEGY: [text]
558
+ BARGAINING_CHIPS: [chip1, chip2, ...]
559
+ Point 2:
560
+ ENHANCED_PROPOSAL: [text]
561
+ ...
562
  """
563
+
564
  return prompt
565
+
566
 
567
  def _parse_strategic_enhancements(self, llm_text: str, original_points: List[NegotiationPoint]) -> List[NegotiationPoint]:
568
  """
569
+ Parse LLM strategic enhancements, assuming a structured format.
570
  """
571
  enhanced = list()
572
+
573
  for i, point in enumerate(original_points):
574
+ point_identifier = f"Point {i+1}:"
575
+ start_idx = llm_text.find(point_identifier)
576
+ if (start_idx == -1):
577
+ log_info(f"LLM response did not contain expected identifier for Point {i+1}. Keeping original.")
578
+ enhanced.append(point)
 
579
 
580
+ continue
581
+
582
+ # Find the start of the next point or end of string
583
+ next_point_idx = llm_text.find(f"Point {i+2}:", start_idx)
584
+ if next_point_idx == -1:
585
+ section_text = llm_text[start_idx:]
586
 
587
+ else:
588
+ section_text = llm_text[start_idx:next_point_idx]
589
+
590
+ # Extract fields using regex within the section_text
591
+ proposal_match = re.search(r"ENHANCED_PROPOSAL:\s*(.*?)(?:\n|$)", section_text, re.DOTALL)
592
+ if proposal_match:
593
+ new_proposal = proposal_match.group(1).strip()
594
+
595
+ # Basic sanity check
596
+ if (new_proposal and (len(new_proposal) > 10)):
597
+ point.proposed_language = new_proposal
598
+
599
+ # TIMING_STRATEGY
600
+ timing_match = re.search(r"TIMING_STRATEGY:\s*(.*?)(?:\n|$)", section_text, re.DOTALL)
601
  if timing_match:
602
+ new_timing = timing_match.group(1).strip()
603
+ if (new_timing and (len(new_timing) > 5)):
604
+ point.timing_suggestion = new_timing
605
+
606
+ # BARGAINING_CHIPS
607
+ chips_match = re.search(r"BARGAINING_CHIPS:\s*\[(.*?)\]", section_text, re.DOTALL)
 
 
608
  if chips_match:
609
+ chips_str = chips_match.group(1).strip()
610
+
611
+ if chips_str:
612
+ # Split by comma and strip whitespace
613
+ chips_list = [chip.strip().strip('"\'') for chip in chips_str.split(',') if chip.strip()]
614
+
615
+ # Keep top 3
616
+ point.bargaining_chips = chips_list[:3]
617
+
618
  enhanced.append(point)
619
+
620
  return enhanced
621
+
622
 
623
  def _generate_overall_strategy(self, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation, contract_type: ContractType, provider: LLMProvider) -> str:
624
  """
 
626
  """
627
  prompt = f"""
628
  As a negotiation expert, provide overall strategy for this contract.
 
629
  CONTRACT TYPE: {contract_type.value}
630
  RISK LEVEL: {risk_analysis.overall_score}/100 ({risk_analysis.risk_level})
631
  KEY CONCERNS: {risk_interpretation.key_concerns}
 
632
  Provide a concise 3-4 sentence negotiation strategy focusing on:
633
  - Overall approach (collaborative vs. firm)
634
  - Key priorities
635
  - Risk management
636
  - Success metrics
 
637
  Strategy:
638
  """
 
639
  try:
640
  response = self.llm_manager.complete(prompt = prompt,
641
  provider = provider,
 
644
  )
645
 
646
  return response.text.strip() if response.success else "Focus on addressing critical risks while maintaining collaborative negotiation tone."
647
+
648
  except Exception as e:
649
  log_error(e, context = {"operation": "generate_overall_strategy"})
 
650
  return "Prioritize critical risk items while seeking balanced, market-standard terms."
651
+
652
 
653
  def _identify_walk_away_items(self, negotiation_points: List[NegotiationPoint], risk_analysis: RiskScore) -> List[str]:
654
  """
655
  Identify non-negotiable walk-away items
656
  """
657
  walk_away = list()
 
658
  critical_points = [p for p in negotiation_points if (p.priority == 1)]
659
 
660
  for point in critical_points:
661
  if ((point.estimated_difficulty == "hard") and (risk_analysis.overall_score >= 70)):
662
  walk_away.append(f"{point.issue} - critical risk that cannot be mitigated")
663
+
664
  # Max 5 walk-away items
665
  return walk_away[:5]
 
666
 
667
+
668
+ def _identify_concession_items(self, negotiation_points: List[NegotiationPoint], risk_analysis: RiskScore) -> List[str]:
669
  """
670
  Identify items that can be conceded
671
  """
672
  concessions = list()
673
+ low_priority = [p for p in negotiation_points if (p.priority >= 4)]
 
674
 
675
  for point in low_priority[:2]:
676
  if (point.estimated_difficulty == "hard"):
677
  concessions.append(f"{point.issue} - lower priority, high difficulty")
678
+
679
  return concessions
680
+
681
 
682
  def _generate_timing_guidance(self, negotiation_points: List[NegotiationPoint], contract_type: ContractType, provider: LLMProvider) -> str:
683
  """
 
693
 
694
  else:
695
  return "Progressive approach: start with easier wins to build momentum"
696
+
697
 
698
  def _generate_risk_mitigation_plan(self, risk_analysis: RiskScore, negotiation_points: List[NegotiationPoint], provider: LLMProvider) -> str:
699
  """
 
701
  """
702
  if (risk_analysis.overall_score >= 70):
703
  return "High risk level - focus on critical term resolution. Have fallback positions ready."
704
+
705
  elif (risk_analysis.overall_score >= 50):
706
  return "Moderate risk - prioritize 2-3 key improvements. Document remaining risks."
707
+
708
  else:
709
  return "Manageable risk level - focus on most impactful improvements."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
services/protection_checker.py CHANGED
@@ -6,7 +6,6 @@ from typing import Dict
6
  from typing import Tuple
7
  from pathlib import Path
8
  from typing import Optional
9
- from dataclasses import dataclass
10
 
11
  # Add parent directory to path for imports
12
  sys.path.append(str(Path(__file__).parent.parent))
@@ -16,41 +15,8 @@ from utils.logger import log_error
16
  from config.risk_rules import RiskRules
17
  from config.risk_rules import ContractType
18
  from utils.logger import ContractAnalyzerLogger
19
- from services.clause_extractor import ExtractedClause
20
-
21
-
22
- @dataclass
23
- class MissingProtection:
24
- """
25
- Missing protection item with comprehensive risk analysis
26
- """
27
- protection : str
28
- importance : str # "critical", "high", "medium", "low"
29
- risk_score : float # 0-100 from risk_rules
30
- explanation : str
31
- recommendation : str
32
- categories : List[str]
33
- contract_type : Optional[str] = None
34
- suggested_language: Optional[str] = None
35
- legal_basis : Optional[str] = None
36
- affected_clauses : Optional[List[str]] = None
37
-
38
- def to_dict(self) -> Dict:
39
- """
40
- Convert to dictionary
41
- """
42
- return {"protection" : self.protection,
43
- "importance" : self.importance,
44
- "risk_score" : round(self.risk_score, 2),
45
- "explanation" : self.explanation,
46
- "recommendation" : self.recommendation,
47
- "categories" : self.categories,
48
- "contract_type" : self.contract_type,
49
- "suggested_language": self.suggested_language,
50
- "legal_basis" : self.legal_basis,
51
- "affected_clauses" : self.affected_clauses or [],
52
- }
53
-
54
 
55
 
56
  class ProtectionChecker:
@@ -60,208 +26,155 @@ class ProtectionChecker:
60
  def __init__(self, contract_type: ContractType = ContractType.GENERAL):
61
  """
62
  Initialize protection checker with contract-type specific analysis
63
-
64
  Arguments:
65
  ----------
66
  contract_type { ContractType } : Contract type for protection prioritization
67
  """
68
- self.contract_type = contract_type
69
- self.rules = RiskRules()
70
- self.logger = ContractAnalyzerLogger.get_logger()
71
-
72
  # Contract-type specific protection priorities
73
  self.protection_priorities = self._get_contract_type_priorities()
74
-
75
- log_info("ProtectionChecker initialized",
76
- contract_type = contract_type.value,
77
  protection_count = len(self.rules.PROTECTION_CHECKLIST),
78
  )
79
-
80
 
81
  def _get_contract_type_priorities(self) -> Dict[str, List[str]]:
82
  """
83
  Get protection priorities by contract type
84
  """
85
- priorities = {ContractType.EMPLOYMENT.value : ['for_cause_definition', 'severance_provision', 'prior_ip_exclusion', 'confidentiality_duration'],
86
- ContractType.SOFTWARE.value : ['liability_cap', 'prior_ip_exclusion', 'mutual_indemnification', 'dispute_resolution'],
87
- ContractType.CONSULTING.value : ['liability_cap', 'mutual_indemnification', 'payment_terms', 'change_control_process'],
88
- ContractType.NDA.value : ['confidentiality_duration', 'prior_ip_exclusion', 'dispute_resolution'],
89
- ContractType.LEASE.value : ['dispute_resolution', 'change_control_process', 'insurance_requirements'],
90
- ContractType.PURCHASE.value : ['liability_cap', 'warranty_protection', 'dispute_resolution'],
91
- ContractType.GENERAL.value : ['liability_cap', 'mutual_indemnification', 'dispute_resolution'],
92
  }
93
-
94
  return priorities.get(self.contract_type.value, [])
95
-
96
 
97
  @ContractAnalyzerLogger.log_execution_time("check_missing_protections")
98
  def check_missing_protections(self, contract_text: str, clauses: List[ExtractedClause], contract_type: Optional[ContractType] = None) -> List[MissingProtection]:
99
  """
100
  Identify all missing protections using comprehensive RiskRules framework
101
-
102
  Arguments:
103
  ----------
104
  contract_text { str } : Full contract text
105
 
106
  clauses { list } : Extracted clauses
107
-
108
  contract_type { ContractType } : Override contract type
109
-
110
  Returns:
111
  --------
112
  { list } : List of MissingProtection objects
113
  """
114
-
115
  # Update contract type if provided
116
  if contract_type:
117
  self.contract_type = contract_type
118
  self.protection_priorities = self._get_contract_type_priorities()
119
-
120
  log_info("Starting missing protections analysis",
121
- text_length = len(contract_text),
122
- num_clauses = len(clauses),
123
- contract_type = self.contract_type.value,
124
  )
125
-
126
  missing = list()
127
  text_lower = contract_text.lower()
128
-
129
  # Check each protection in RiskRules PROTECTION_CHECKLIST
130
  for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
131
  is_present, found_in_clauses = self._check_protection_comprehensive(protection_id = protection_id,
132
- text_lower = text_lower,
133
- clauses = clauses,
134
  )
135
-
136
  if not is_present:
137
  missing_protection = self._create_missing_protection(protection_id = protection_id,
138
  config = config,
139
  found_in_clauses = found_in_clauses,
140
  )
141
-
142
  missing.append(missing_protection)
143
-
144
  # Prioritize by contract type and risk score
145
  final_missing = self._prioritize_missing_protections(missing_protections = missing)
146
-
147
  log_info("Missing protections analysis complete",
148
  total_missing = len(final_missing),
149
  critical = sum(1 for p in final_missing if (p.importance == "critical")),
150
  high = sum(1 for p in final_missing if (p.importance == "high")),
151
  )
152
-
153
  return final_missing
154
-
155
 
156
  def _check_protection_comprehensive(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> Tuple[bool, List[str]]:
157
  """
158
  Comprehensive protection detection using multiple methods
159
-
160
  Returns:
161
  --------
162
  { tuple } : (is_present, list of clause references where protection was found)
163
  """
164
  found_in_clauses = list()
165
-
166
  # Enhanced protection patterns with regex for better matching
167
  protection_patterns = self._get_protection_patterns(protection_id = protection_id)
168
-
169
  # Check in full text with regex patterns
170
  for pattern in protection_patterns:
171
  if re.search(pattern, text_lower, re.IGNORECASE):
172
  return True, found_in_clauses
173
-
174
  # Check in relevant clauses with context awareness
175
  relevant_categories = self.rules.PROTECTION_CHECKLIST[protection_id]["categories"]
176
  relevant_clauses = [c for c in clauses if c.category in relevant_categories]
177
-
178
  for clause in relevant_clauses:
179
  clause_text_lower = clause.text.lower()
180
-
181
  for pattern in protection_patterns:
182
  if re.search(pattern, clause_text_lower, re.IGNORECASE):
183
  found_in_clauses.append(clause.reference)
 
184
  return True, found_in_clauses
185
-
186
  # Additional semantic checks for complex protections
187
- if self._check_protection_semantic(protection_id = protection_id, text_lower = text_lower, clauses = clauses):
188
  return True, found_in_clauses
189
-
190
  return False, found_in_clauses
191
-
192
 
193
  def _get_protection_patterns(self, protection_id: str) -> List[str]:
194
  """
195
  Get comprehensive regex patterns for each protection
196
  """
197
- patterns = {"for_cause_definition" : [r'for\s+cause\s+means',
198
- r'cause\s+defined\s+as',
199
- r'grounds?\s+for\s+termination',
200
- r'termination\s+for\s+cause',
201
- r'just\s+cause\s+definition',
202
- ],
203
- "severance_provision" : [r'severance\s+(pay|compensation|benefits)',
204
- r'separation\s+(pay|package|compensation)',
205
- r'termination\s+(pay|benefits)',
206
- r'upon\s+termination.*pay',
207
- r'severance.*equal\s+to',
208
- ],
209
- "mutual_indemnification" : [r'mutual\s+indemnification',
210
- r'each\s+party\s+shall\s+indemnify',
211
- r'both\s+parties\s+indemnify',
212
- r'reciprocal\s+indemnification',
213
- r'indemnification.*mutual',
214
- ],
215
- "liability_cap" : [r'liability.*cap',
216
- r'maximum\s+liability',
217
- r'limited\s+to.*\$?\d+',
218
- r'not\s+exceed.*\$?\d+',
219
- r'liability\s+shall\s+not\s+exceed',
220
- r'cap.*liability',
221
- ],
222
- "prior_ip_exclusion" : [r'prior\s+intellectual\s+property',
223
- r'existing\s+ip',
224
- r'background\s+ip',
225
- r'pre-existing',
226
- r'prior\s+inventions',
227
- r'personal\s+projects',
228
- ],
229
- "confidentiality_duration" : [r'confidentiality.*period\s+of',
230
- r'for\s+\d+\s+years\s+from',
231
- r'confidentiality.*expire',
232
- r'confidentiality.*term',
233
- r'duration.*confidentiality',
234
- ],
235
- "dispute_resolution" : [r'arbitration',
236
- r'mediation',
237
- r'dispute\s+resolution',
238
- r'resolution\s+of\s+disputes',
239
- r'alternative\s+dispute',
240
- r'adr',
241
- ],
242
- "change_control_process" : [r'change\s+order',
243
- r'change\s+request',
244
- r'amendment.*writing',
245
- r'modification.*writing',
246
- r'written\s+consent',
247
- r'change\s+control',
248
- ],
249
- "insurance_requirements" : [r'insurance\s+requirements',
250
- r'maintain\s+insurance',
251
- r'proof\s+of\s+insurance',
252
- r'coverage.*\$?\d+',
253
- r'liability\s+insurance',
254
- ],
255
- "force_majeure" : [r'force\s+majeure',
256
- r'act\s+of\s+god',
257
- r'unforeseeable',
258
- r'beyond\s+control',
259
- r'natural\s+disaster',
260
- ],
261
  }
262
-
263
  return patterns.get(protection_id, [rf'\b{protection_id}\b'])
264
-
265
 
266
  def _check_protection_semantic(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> bool:
267
  """
@@ -271,33 +184,35 @@ class ProtectionChecker:
271
  # Check if there's any indemnification that's not mutual
272
  has_indemnification = bool(re.search(r'indemnif', text_lower))
273
  has_mutual_language = bool(re.search(r'mutual|each party|both parties', text_lower))
274
-
275
  return has_indemnification and has_mutual_language
276
-
277
  elif (protection_id == "liability_cap"):
278
  # Check if there's liability language but no cap
279
  has_liability = bool(re.search(r'liability|liable', text_lower))
280
  has_cap = bool(re.search(r'cap|limit|maximum|not exceed', text_lower))
281
-
282
  return has_liability and has_cap
283
-
284
  elif (protection_id == "prior_ip_exclusion"):
285
  # Check if there's IP assignment but no exclusion
286
  has_ip_assignment = bool(re.search(r'intellectual property|work product|inventions', text_lower))
287
  has_exclusion = bool(re.search(r'prior|existing|background|exclude', text_lower))
288
-
289
  return has_ip_assignment and has_exclusion
290
-
291
  return False
292
-
293
 
294
  def _create_missing_protection(self, protection_id: str, config: Dict, found_in_clauses: List[str]) -> MissingProtection:
295
  """
296
  Create comprehensive MissingProtection object
297
  """
298
- protection_name = self._format_protection_name(protection_id = protection_id)
299
-
300
- return MissingProtection(protection = protection_name,
 
 
301
  importance = config["importance"],
302
  risk_score = config["risk_if_missing"],
303
  explanation = self._get_comprehensive_explanation(protection_id = protection_id),
@@ -308,103 +223,104 @@ class ProtectionChecker:
308
  legal_basis = self._get_legal_basis(protection_id = protection_id),
309
  affected_clauses = found_in_clauses,
310
  )
311
-
312
-
313
- def _format_protection_name(self, protection_id: str) -> str:
314
- """
315
- Convert protection_id to readable name
316
- """
317
- names = {"for_cause_definition" : "For Cause Definition",
318
- "severance_provision" : "Severance Provision",
319
- "mutual_indemnification" : "Mutual Indemnification",
320
- "liability_cap" : "Liability Cap",
321
- "prior_ip_exclusion" : "Prior IP Exclusion",
322
- "confidentiality_duration" : "Confidentiality Duration Limit",
323
- "dispute_resolution" : "Dispute Resolution Process",
324
- "change_control_process" : "Change Control Process",
325
- "insurance_requirements" : "Insurance Requirements",
326
- "force_majeure" : "Force Majeure Protection",
327
- }
328
-
329
- return names.get(protection_id, protection_id.replace("_", " ").title())
330
-
331
 
332
  def _get_comprehensive_explanation(self, protection_id: str) -> str:
333
  """
334
  Get detailed explanation for why this protection matters
335
  """
336
  explanations = {"for_cause_definition" : ("Without a clear 'for cause' definition, termination grounds remain ambiguous and subject to interpretation abuse. "
337
- "This creates significant job insecurity and potential for arbitrary termination without proper recourse."),
 
338
  "severance_provision" : ("Missing severance provision means zero financial protection if terminated without cause. "
339
- "Industry standards provide 2-3 months salary to support transition and mitigate sudden income loss."),
 
340
  "mutual_indemnification" : ("One-sided indemnification creates asymmetric liability exposure. Mutual protection ensures both parties share "
341
- "responsibility for their respective breaches, negligence, or misconduct."),
 
342
  "liability_cap" : ("Unlimited liability exposes you to catastrophic financial risk beyond reasonable business expectations. "
343
- "Standard practice caps liability at fees paid or a reasonable multiple of contract value."),
 
344
  "prior_ip_exclusion" : ("Without prior IP exclusion, your existing intellectual property and personal projects could be claimed by the other party. "
345
- "This protection preserves ownership of work created before and outside this engagement."),
 
346
  "confidentiality_duration" : ("Indefinite confidentiality obligations unreasonably restrict future business activities indefinitely. "
347
- "Industry standards limit confidentiality to 3-5 years post-termination for most information."),
 
348
  "dispute_resolution" : ("Without formal dispute resolution, conflicts escalate directly to costly litigation. Mediation and arbitration "
349
- "provide efficient, cost-effective alternatives with specialized expertise."),
 
350
  "change_control_process" : ("Lack of change control enables scope creep and verbal modifications that create ambiguity. Formal processes "
351
- "ensure all changes are documented, approved, and properly scoped."),
352
- "insurance_requirements" : ("Missing insurance requirements leave you exposed to uncovered liabilities. Proper coverage transfers "
353
- "risk and provides financial protection for both parties."),
 
 
354
  "force_majeure" : ("Without force majeure protection, you remain liable for performance during unforeseeable events beyond control. "
355
- "This clause provides reasonable relief during extraordinary circumstances."),
 
356
  }
357
-
358
  return explanations.get(protection_id, "This protection is critical for balanced risk allocation and legal fairness.")
359
-
360
 
361
  def _get_detailed_recommendation(self, protection_id: str) -> str:
362
  """
363
  Get detailed recommendation for adding this protection
364
  """
365
  recommendations = {"for_cause_definition" : ("Add clear 'For Cause' definition including: gross negligence, willful misconduct, material breach after "
366
- "30-day cure period, conviction of felony, or fraud. Require written notice specifying grounds."),
 
367
  "severance_provision" : ("Include severance equal to 2-3 months base salary for termination without cause, payable within 30 days. "
368
- "Add pro-rated bonus calculation and continuation of benefits during severance period."),
 
369
  "mutual_indemnification" : ("Replace one-sided language with: 'Each party shall indemnify, defend, and hold harmless the other party "
370
- "from claims arising from their respective breach, negligence, or willful misconduct.'"),
 
371
  "liability_cap" : ("Add: 'Total liability of either party under this Agreement shall not exceed the greater of (a) fees paid "
372
- "in the 12 months preceding the claim, or (b) $[reasonable amount]. Exclude liability for indirect damages.'"),
 
373
  "prior_ip_exclusion" : ("Include: 'Work Product excludes Employee's prior intellectual property, existing inventions, personal projects "
374
- "unrelated to Company business, and open source contributions. Attach prior IP list as Exhibit A.'"),
 
375
  "confidentiality_duration" : ("Specify: 'Confidentiality obligations shall survive termination for 3-5 years. Trade secrets protected "
376
- "indefinitely but must be specifically identified. Publicly available information excluded.'"),
 
377
  "dispute_resolution" : ("Add: 'Disputes shall first be subject to 30-day good faith mediation. If unresolved, binding arbitration "
378
- "under [rules] in [neutral location]. Each party bears own costs, arbitrator may award fees to prevailing party.'"),
 
379
  "change_control_process" : ("Include: 'All amendments require written change orders signed by both parties. Change orders must specify "
380
- "scope, timeline, cost, and acceptance criteria. Verbal agreements are not binding.'"),
 
381
  "insurance_requirements" : ("Specify: 'Contractor shall maintain general liability insurance of $1M per occurrence, professional liability "
382
- "insurance of $2M, and workers' compensation. Provide certificates of insurance before commencement.'"),
 
383
  "force_majeure" : ("Add: 'Neither party liable for failure to perform due to causes beyond reasonable control including acts of God, "
384
- "war, strikes, or natural disasters. Performance suspended during event, resume when practicable.'"),
 
385
  }
386
-
387
  return recommendations.get(protection_id, "Negotiate to include this standard protection for balanced risk allocation.")
388
-
389
 
390
  def _get_suggested_language(self, protection_id: str) -> str:
391
  """
392
  Get actual suggested clause language
393
  """
394
- language_library = {"for_cause_definition" : "\"For Cause\" means: (a) gross negligence or willful misconduct; (b) material breach of this Agreement after 30-day written notice and cure period; (c) conviction of a felony; or (d) fraud, dishonesty, or embezzlement.",
395
- "severance_provision" : "Upon termination without cause, Company shall pay Employee severance equal to three months of base salary, payable within 30 days of termination. Employee shall also receive pro-rated annual bonus and continuation of health benefits during severance period.",
396
- "mutual_indemnification" : "Each party shall indemnify, defend, and hold harmless the other party from and against any and all claims, damages, losses, and expenses arising from the indemnifying party's breach of this Agreement, negligence, or willful misconduct.",
397
- "liability_cap" : "Notwithstanding anything to the contrary, the total liability of either party under this Agreement shall not exceed the greater of (a) the fees paid by Customer to Provider in the twelve months preceding the claim, or (b) $500,000. Neither party shall be liable for any indirect, special, incidental, or consequential damages.",
398
- "prior_ip_exclusion" : "Work Product excludes any intellectual property, inventions, or creative works developed by Employee prior to this Agreement or developed outside the scope of employment without using Company resources. Employee has listed prior IP in Exhibit A. Background IP remains the property of its respective owner.",
399
- "confidentiality_duration" : "The obligations of confidentiality shall survive termination of this Agreement for a period of five years. Trade secrets shall be protected indefinitely. Confidential Information shall not include information that is or becomes publicly available through no fault of Receiving Party.",
400
- "dispute_resolution" : "Any dispute arising under this Agreement shall first be submitted to mediation with a mutually acceptable mediator. If mediation fails after 30 days, either party may initiate binding arbitration under the rules of the American Arbitration Association. The prevailing party in any dispute shall be entitled to recover reasonable attorneys' fees and costs.",
401
- "change_control_process" : "No amendment, modification, or waiver of any provision of this Agreement shall be effective unless in writing and signed by both parties. All change requests must be submitted in writing as Change Orders, specifying the changes, associated costs, timeline impacts, and acceptance criteria.",
402
- "insurance_requirements" : "Contractor shall maintain at its own expense: (a) Commercial General Liability insurance with limits of $1,000,000 per occurrence; (b) Professional Liability insurance with limits of $2,000,000 per claim; and (c) Workers' Compensation insurance as required by law. Certificates of insurance shall be provided to Client upon request.",
403
- "force_majeure" : "Neither party shall be liable for any failure or delay in performance under this Agreement due to causes beyond its reasonable control, including acts of God, war, terrorism, labor disputes, or governmental actions. The affected party shall notify the other party promptly and resume performance as soon as practicable.",
404
  }
405
-
406
  return language_library.get(protection_id, "Standard protection clause appropriate for this contract type.")
407
-
408
 
409
  def _get_legal_basis(self, protection_id: str) -> str:
410
  """
@@ -412,7 +328,7 @@ class ProtectionChecker:
412
  """
413
  legal_bases = {"for_cause_definition" : "Employment protection statutes and doctrine of good faith and fair dealing",
414
  "severance_provision" : "Industry standards and reasonable notice requirements",
415
- "mutual_indemnification" : "Principle of mutuality and unconscionability doctrine",
416
  "liability_cap" : "Commercial reasonableness and risk allocation principles",
417
  "prior_ip_exclusion" : "Intellectual property rights and prior ownership protection",
418
  "confidentiality_duration" : "Reasonableness standard for restrictive covenants",
@@ -421,9 +337,9 @@ class ProtectionChecker:
421
  "insurance_requirements" : "Risk management and liability transfer principles",
422
  "force_majeure" : "Impossibility of performance and commercial impracticability",
423
  }
424
-
425
  return legal_bases.get(protection_id, "Standard contractual protection for balanced risk allocation")
426
-
427
 
428
  def _prioritize_missing_protections(self, missing_protections: List[MissingProtection]) -> List[MissingProtection]:
429
  """
@@ -431,45 +347,48 @@ class ProtectionChecker:
431
  """
432
  if not missing_protections:
433
  return []
434
-
435
  # Sort by risk score (descending)
436
  missing_protections.sort(key = lambda p: p.risk_score, reverse = True)
437
-
438
  # Boost priority for contract-type specific critical protections
439
  for protection in missing_protections:
440
- protection_key = protection.protection.lower().replace(" ", "_")
441
- if protection_key in self.protection_priorities:
442
  # Boost for contract relevance
443
- protection.risk_score += 10
444
-
445
  # Re-sort with boosted scores
446
  missing_protections.sort(key = lambda p: p.risk_score, reverse = True)
447
 
 
448
  # Return top 15 most critical missing protections
449
- return missing_protections[:15]
450
-
 
 
451
 
452
  def get_critical_missing(self, protections: List[MissingProtection]) -> List[MissingProtection]:
453
  """
454
  Filter to only critical missing protections
455
  """
456
  critical = [p for p in protections if (p.importance == "critical")]
457
-
458
  log_info(f"Found {len(critical)} critical missing protections")
459
-
460
  return critical
461
-
462
 
463
  def get_by_category(self, protections: List[MissingProtection], category: str) -> List[MissingProtection]:
464
  """
465
  Filter protections by category
466
  """
467
- filtered = [p for p in protections if (category in p.categories)]
468
-
469
  log_info(f"Found {len(filtered)} missing protections in category '{category}'")
470
-
471
  return filtered
472
-
473
 
474
  def get_importance_distribution(self, protections: List[MissingProtection]) -> Dict[str, int]:
475
  """
@@ -477,35 +396,38 @@ class ProtectionChecker:
477
  """
478
  distribution = {"critical" : 0,
479
  "high" : 0,
480
- "medium" : 0,
481
  "low" : 0,
482
  }
483
-
484
  for protection in protections:
485
  distribution[protection.importance] = distribution.get(protection.importance, 0) + 1
486
-
487
  log_info("Missing protections importance distribution", **distribution)
488
-
489
  return distribution
490
-
491
 
492
  def get_risk_score_summary(self, protections: List[MissingProtection]) -> Dict[str, float]:
493
  """
494
  Get risk score summary statistics
495
  """
496
  if not protections:
497
- return {"total_risk": 0, "average_risk": 0, "max_risk": 0}
498
-
499
- scores = [p.risk_score for p in protections]
500
- total_risk = sum(scores)
501
- average_risk = total_risk / len(scores)
502
- max_risk = max(scores)
503
-
504
- summary = {"total_risk" : round(total_risk, 2),
505
- "average_risk" : round(average_risk, 2),
506
- "max_risk" : round(max_risk, 2),
507
- }
508
-
 
 
 
509
  log_info("Missing protections risk score summary", **summary)
510
-
511
- return summary
 
6
  from typing import Tuple
7
  from pathlib import Path
8
  from typing import Optional
 
9
 
10
  # Add parent directory to path for imports
11
  sys.path.append(str(Path(__file__).parent.parent))
 
15
  from config.risk_rules import RiskRules
16
  from config.risk_rules import ContractType
17
  from utils.logger import ContractAnalyzerLogger
18
+ from services.data_models import ExtractedClause
19
+ from services.data_models import MissingProtection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  class ProtectionChecker:
 
26
  def __init__(self, contract_type: ContractType = ContractType.GENERAL):
27
  """
28
  Initialize protection checker with contract-type specific analysis
29
+
30
  Arguments:
31
  ----------
32
  contract_type { ContractType } : Contract type for protection prioritization
33
  """
34
+ self.contract_type = contract_type
35
+ self.rules = RiskRules()
36
+ self.logger = ContractAnalyzerLogger.get_logger()
37
+
38
  # Contract-type specific protection priorities
39
  self.protection_priorities = self._get_contract_type_priorities()
40
+
41
+ log_info("ProtectionChecker initialized",
42
+ contract_type = self.contract_type.value,
43
  protection_count = len(self.rules.PROTECTION_CHECKLIST),
44
  )
45
+
46
 
47
  def _get_contract_type_priorities(self) -> Dict[str, List[str]]:
48
  """
49
  Get protection priorities by contract type
50
  """
51
+ priorities = {ContractType.EMPLOYMENT.value : ['for_cause_definition', 'severance_provision', 'prior_ip_exclusion', 'confidentiality_duration'],
52
+ ContractType.SOFTWARE.value : ['liability_cap', 'prior_ip_exclusion', 'mutual_indemnification', 'dispute_resolution'],
53
+ ContractType.CONSULTING.value : ['liability_cap', 'mutual_indemnification', 'payment_terms', 'change_control_process'],
54
+ ContractType.NDA.value : ['confidentiality_duration', 'prior_ip_exclusion', 'dispute_resolution'],
55
+ ContractType.LEASE.value : ['dispute_resolution', 'change_control_process', 'insurance_requirements'],
56
+ ContractType.PURCHASE.value : ['liability_cap', 'warranty_protection', 'dispute_resolution'],
57
+ ContractType.GENERAL.value : ['liability_cap', 'mutual_indemnification', 'dispute_resolution'],
58
  }
59
+
60
  return priorities.get(self.contract_type.value, [])
61
+
62
 
63
  @ContractAnalyzerLogger.log_execution_time("check_missing_protections")
64
  def check_missing_protections(self, contract_text: str, clauses: List[ExtractedClause], contract_type: Optional[ContractType] = None) -> List[MissingProtection]:
65
  """
66
  Identify all missing protections using comprehensive RiskRules framework
67
+
68
  Arguments:
69
  ----------
70
  contract_text { str } : Full contract text
71
 
72
  clauses { list } : Extracted clauses
73
+
74
  contract_type { ContractType } : Override contract type
75
+
76
  Returns:
77
  --------
78
  { list } : List of MissingProtection objects
79
  """
80
+
81
  # Update contract type if provided
82
  if contract_type:
83
  self.contract_type = contract_type
84
  self.protection_priorities = self._get_contract_type_priorities()
85
+
86
  log_info("Starting missing protections analysis",
87
+ text_length = len(contract_text),
88
+ num_clauses = len(clauses),
89
+ contract_type = self.contract_type.value,
90
  )
91
+
92
  missing = list()
93
  text_lower = contract_text.lower()
94
+
95
  # Check each protection in RiskRules PROTECTION_CHECKLIST
96
  for protection_id, config in self.rules.PROTECTION_CHECKLIST.items():
97
  is_present, found_in_clauses = self._check_protection_comprehensive(protection_id = protection_id,
98
+ text_lower = text_lower,
99
+ clauses = clauses,
100
  )
101
+
102
  if not is_present:
103
  missing_protection = self._create_missing_protection(protection_id = protection_id,
104
  config = config,
105
  found_in_clauses = found_in_clauses,
106
  )
107
+
108
  missing.append(missing_protection)
109
+
110
  # Prioritize by contract type and risk score
111
  final_missing = self._prioritize_missing_protections(missing_protections = missing)
112
+
113
  log_info("Missing protections analysis complete",
114
  total_missing = len(final_missing),
115
  critical = sum(1 for p in final_missing if (p.importance == "critical")),
116
  high = sum(1 for p in final_missing if (p.importance == "high")),
117
  )
118
+
119
  return final_missing
120
+
121
 
122
  def _check_protection_comprehensive(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> Tuple[bool, List[str]]:
123
  """
124
  Comprehensive protection detection using multiple methods
125
+
126
  Returns:
127
  --------
128
  { tuple } : (is_present, list of clause references where protection was found)
129
  """
130
  found_in_clauses = list()
131
+
132
  # Enhanced protection patterns with regex for better matching
133
  protection_patterns = self._get_protection_patterns(protection_id = protection_id)
134
+
135
  # Check in full text with regex patterns
136
  for pattern in protection_patterns:
137
  if re.search(pattern, text_lower, re.IGNORECASE):
138
  return True, found_in_clauses
139
+
140
  # Check in relevant clauses with context awareness
141
  relevant_categories = self.rules.PROTECTION_CHECKLIST[protection_id]["categories"]
142
  relevant_clauses = [c for c in clauses if c.category in relevant_categories]
143
+
144
  for clause in relevant_clauses:
145
  clause_text_lower = clause.text.lower()
146
+
147
  for pattern in protection_patterns:
148
  if re.search(pattern, clause_text_lower, re.IGNORECASE):
149
  found_in_clauses.append(clause.reference)
150
+
151
  return True, found_in_clauses
152
+
153
  # Additional semantic checks for complex protections
154
+ if self._check_protection_semantic(protection_id=protection_id, text_lower=text_lower, clauses=clauses):
155
  return True, found_in_clauses
156
+
157
  return False, found_in_clauses
158
+
159
 
160
  def _get_protection_patterns(self, protection_id: str) -> List[str]:
161
  """
162
  Get comprehensive regex patterns for each protection
163
  """
164
+ patterns = {"for_cause_definition" : [r'for\s+cause\s+means', r'cause\s+defined\s+as', r'grounds?\s+for\s+termination', r'termination\s+for\s+cause', r'just\s+cause\s+definition',],
165
+ "severance_provision" : [r'severance\s+(pay|compensation|benefits)', r'separation\s+(pay|package|compensation)', r'termination\s+(pay|benefits)', r'upon\s+termination.*pay', r'severance.*equal\s+to',],
166
+ "mutual_indemnification" : [r'mutual\s+indemnification', r'each\s+party\s+shall\s+indemnify', r'both\s+parties\s+indemnify', r'reciprocal\s+indemnification', r'indemnification.*mutual',],
167
+ "liability_cap" : [r'liability.*cap', r'maximum\s+liability', r'limited\s+to.*\$?\d+', r'not\s+exceed.*\$?\d+', r'liability\s+shall\s+not\s+exceed', r'cap.*liability',],
168
+ "prior_ip_exclusion" : [r'prior\s+intellectual\s+property', r'existing\s+ip', r'background\s+ip', r'pre-existing', r'prior\s+inventions', r'personal\s+projects',],
169
+ "confidentiality_duration" : [r'confidentiality.*period\s+of', r'for\s+\d+\s+years\s+from', r'confidentiality.*expire', r'confidentiality.*term', r'duration.*confidentiality',],
170
+ "dispute_resolution" : [r'arbitration', r'mediation', r'dispute\s+resolution', r'resolution\s+of\s+disputes', r'alternative\s+dispute', r'adr',],
171
+ "change_control_process" : [r'change\s+order', r'change\s+request', r'amendment.*writing', r'modification.*writing', r'written\s+consent', r'change\s+control',],
172
+ "insurance_requirements" : [r'insurance\s+requirements', r'maintain\s+insurance', r'proof\s+of\s+insurance', r'coverage.*\$?\d+', r'liability\s+insurance',],
173
+ "force_majeure" : [ r'force\s+majeure', r'act\s+of\s+god', r'unforeseeable', r'beyond\s+control', r'natural\s+disaster',],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
+
176
  return patterns.get(protection_id, [rf'\b{protection_id}\b'])
177
+
178
 
179
  def _check_protection_semantic(self, protection_id: str, text_lower: str, clauses: List[ExtractedClause]) -> bool:
180
  """
 
184
  # Check if there's any indemnification that's not mutual
185
  has_indemnification = bool(re.search(r'indemnif', text_lower))
186
  has_mutual_language = bool(re.search(r'mutual|each party|both parties', text_lower))
187
+
188
  return has_indemnification and has_mutual_language
189
+
190
  elif (protection_id == "liability_cap"):
191
  # Check if there's liability language but no cap
192
  has_liability = bool(re.search(r'liability|liable', text_lower))
193
  has_cap = bool(re.search(r'cap|limit|maximum|not exceed', text_lower))
194
+
195
  return has_liability and has_cap
196
+
197
  elif (protection_id == "prior_ip_exclusion"):
198
  # Check if there's IP assignment but no exclusion
199
  has_ip_assignment = bool(re.search(r'intellectual property|work product|inventions', text_lower))
200
  has_exclusion = bool(re.search(r'prior|existing|background|exclude', text_lower))
201
+
202
  return has_ip_assignment and has_exclusion
203
+
204
  return False
205
+
206
 
207
  def _create_missing_protection(self, protection_id: str, config: Dict, found_in_clauses: List[str]) -> MissingProtection:
208
  """
209
  Create comprehensive MissingProtection object
210
  """
211
+ # Use centralized map for display name
212
+ protection_name = self.rules.get_protection_display_name(protection_id)
213
+
214
+ return MissingProtection(protection_id = protection_id,
215
+ protection = protection_name,
216
  importance = config["importance"],
217
  risk_score = config["risk_if_missing"],
218
  explanation = self._get_comprehensive_explanation(protection_id = protection_id),
 
223
  legal_basis = self._get_legal_basis(protection_id = protection_id),
224
  affected_clauses = found_in_clauses,
225
  )
226
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  def _get_comprehensive_explanation(self, protection_id: str) -> str:
229
  """
230
  Get detailed explanation for why this protection matters
231
  """
232
  explanations = {"for_cause_definition" : ("Without a clear 'for cause' definition, termination grounds remain ambiguous and subject to interpretation abuse. "
233
+ "This creates significant job insecurity and potential for arbitrary termination without proper recourse."
234
+ ),
235
  "severance_provision" : ("Missing severance provision means zero financial protection if terminated without cause. "
236
+ "Industry standards provide 2-3 months salary to support transition and mitigate sudden income loss."
237
+ ),
238
  "mutual_indemnification" : ("One-sided indemnification creates asymmetric liability exposure. Mutual protection ensures both parties share "
239
+ "responsibility for their respective breaches, negligence, or misconduct."
240
+ ),
241
  "liability_cap" : ("Unlimited liability exposes you to catastrophic financial risk beyond reasonable business expectations. "
242
+ "Standard practice caps liability at fees paid or a reasonable multiple of contract value."
243
+ ),
244
  "prior_ip_exclusion" : ("Without prior IP exclusion, your existing intellectual property and personal projects could be claimed by the other party. "
245
+ "This protection preserves ownership of work created before and outside this engagement."
246
+ ),
247
  "confidentiality_duration" : ("Indefinite confidentiality obligations unreasonably restrict future business activities indefinitely. "
248
+ "Industry standards limit confidentiality to 3-5 years post-termination for most information."
249
+ ),
250
  "dispute_resolution" : ("Without formal dispute resolution, conflicts escalate directly to costly litigation. Mediation and arbitration "
251
+ "provide efficient, cost-effective alternatives with specialized expertise."
252
+ ),
253
  "change_control_process" : ("Lack of change control enables scope creep and verbal modifications that create ambiguity. Formal processes "
254
+ "ensure all changes are documented, approved, and properly scoped."
255
+ ),
256
+ "insurance_requirements" : ("Missing insurance requirements leave you exposed to uncovered liabilities. "
257
+ "Proper coverage transfers risk and provides financial protection for both parties."
258
+ ),
259
  "force_majeure" : ("Without force majeure protection, you remain liable for performance during unforeseeable events beyond control. "
260
+ "This clause provides reasonable relief during extraordinary circumstances."
261
+ ),
262
  }
263
+
264
  return explanations.get(protection_id, "This protection is critical for balanced risk allocation and legal fairness.")
265
+
266
 
267
  def _get_detailed_recommendation(self, protection_id: str) -> str:
268
  """
269
  Get detailed recommendation for adding this protection
270
  """
271
  recommendations = {"for_cause_definition" : ("Add clear 'For Cause' definition including: gross negligence, willful misconduct, material breach after "
272
+ "30-day cure period, conviction of felony, or fraud. Require written notice specifying grounds."
273
+ ),
274
  "severance_provision" : ("Include severance equal to 2-3 months base salary for termination without cause, payable within 30 days. "
275
+ "Add pro-rated bonus calculation and continuation of benefits during severance period."
276
+ ),
277
  "mutual_indemnification" : ("Replace one-sided language with: 'Each party shall indemnify, defend, and hold harmless the other party "
278
+ "from claims arising from their respective breach, negligence, or willful misconduct.'"
279
+ ),
280
  "liability_cap" : ("Add: 'Total liability of either party under this Agreement shall not exceed the greater of (a) fees paid "
281
+ "in the 12 months preceding the claim, or (b) $[reasonable amount]. Exclude liability for indirect damages.'"
282
+ ),
283
  "prior_ip_exclusion" : ("Include: 'Work Product excludes Employee's prior intellectual property, existing inventions, personal projects "
284
+ "unrelated to Company business, and open source contributions. Attach prior IP list as Exhibit A.'"
285
+ ),
286
  "confidentiality_duration" : ("Specify: 'Confidentiality obligations shall survive termination for 3-5 years. Trade secrets protected "
287
+ "indefinitely but must be specifically identified. Publicly available information excluded.'"
288
+ ),
289
  "dispute_resolution" : ("Add: 'Disputes shall first be subject to 30-day good faith mediation. If unresolved, binding arbitration "
290
+ "under [rules] in [neutral location]. Each party bears own costs, arbitrator may award fees to prevailing party.'"
291
+ ),
292
  "change_control_process" : ("Include: 'All amendments require written change orders signed by both parties. Change orders must specify "
293
+ "scope, timeline, cost, and acceptance criteria. Verbal agreements are not binding.'"
294
+ ),
295
  "insurance_requirements" : ("Specify: 'Contractor shall maintain general liability insurance of $1M per occurrence, professional liability "
296
+ "insurance of $2M, and workers' compensation. Provide certificates of insurance before commencement.'"
297
+ ),
298
  "force_majeure" : ("Add: 'Neither party liable for failure to perform due to causes beyond reasonable control including acts of God, "
299
+ "war, strikes, or natural disasters. Performance suspended during event, resume when practicable.'"
300
+ ),
301
  }
302
+
303
  return recommendations.get(protection_id, "Negotiate to include this standard protection for balanced risk allocation.")
304
+
305
 
306
  def _get_suggested_language(self, protection_id: str) -> str:
307
  """
308
  Get actual suggested clause language
309
  """
310
+ language_library = {"for_cause_definition" : ("\"For Cause\" means: (a) gross negligence or willful misconduct; (b) material breach of this Agreement after 30-day written notice and cure period; (c) conviction of a felony; or (d) fraud, dishonesty, or embezzlement."),
311
+ "severance_provision" : ("Upon termination without cause, Company shall pay Employee severance equal to three months of base salary, payable within 30 days of termination. Employee shall also receive pro-rated annual bonus and continuation of health benefits during severance period."),
312
+ "mutual_indemnification" : ("Each party shall indemnify, defend, and hold harmless the other party from and against any and all claims, damages, losses, and expenses arising from the indemnifying party's breach of this Agreement, negligence, or willful misconduct."),
313
+ "liability_cap" : ("Notwithstanding anything to the contrary, the total liability of either party under this Agreement shall not exceed the greater of (a) the fees paid by Customer to Provider in the twelve months preceding the claim, or (b) $500,000. Neither party shall be liable for any indirect, special, incidental, or consequential damages."),
314
+ "prior_ip_exclusion" : ("Work Product excludes any intellectual property, inventions, or creative works developed by Employee prior to this Agreement or developed outside the scope of employment without using Company resources. Employee has listed prior IP in Exhibit A. Background IP remains the property of its respective owner."),
315
+ "confidentiality_duration" : ("The obligations of confidentiality shall survive termination of this Agreement for a period of five years. Trade secrets shall be protected indefinitely. Confidential Information shall not include information that is or becomes publicly available through no fault of Receiving Party."),
316
+ "dispute_resolution" : ("Any dispute arising under this Agreement shall first be submitted to mediation with a mutually acceptable mediator. If mediation fails after 30 days, either party may initiate binding arbitration under the rules of the American Arbitration Association. The prevailing party in any dispute shall be entitled to recover reasonable attorneys' fees and costs."),
317
+ "change_control_process" : ("No amendment, modification, or waiver of any provision of this Agreement shall be effective unless in writing and signed by both parties. All change requests must be submitted in writing as Change Orders, specifying the changes, associated costs, timeline impacts, and acceptance criteria."),
318
+ "insurance_requirements" : ("Contractor shall maintain at its own expense: (a) Commercial General Liability insurance with limits of $1,000,000 per occurrence; (b) Professional Liability insurance with limits of $2,000,000 per claim; and (c) Workers' Compensation insurance as required by law. Certificates of insurance shall be provided to Client upon request."),
319
+ "force_majeure" : ("Neither party shall be liable for any failure or delay in performance under this Agreement due to causes beyond its reasonable control, including acts of God, war, terrorism, labor disputes, or governmental actions. The affected party shall notify the other party promptly and resume performance as soon as practicable."),
320
  }
321
+
322
  return language_library.get(protection_id, "Standard protection clause appropriate for this contract type.")
323
+
324
 
325
  def _get_legal_basis(self, protection_id: str) -> str:
326
  """
 
328
  """
329
  legal_bases = {"for_cause_definition" : "Employment protection statutes and doctrine of good faith and fair dealing",
330
  "severance_provision" : "Industry standards and reasonable notice requirements",
331
+ "mutual_indemnification" : "Principle of mutuality and unconscionability doctrine",
332
  "liability_cap" : "Commercial reasonableness and risk allocation principles",
333
  "prior_ip_exclusion" : "Intellectual property rights and prior ownership protection",
334
  "confidentiality_duration" : "Reasonableness standard for restrictive covenants",
 
337
  "insurance_requirements" : "Risk management and liability transfer principles",
338
  "force_majeure" : "Impossibility of performance and commercial impracticability",
339
  }
340
+
341
  return legal_bases.get(protection_id, "Standard contractual protection for balanced risk allocation")
342
+
343
 
344
  def _prioritize_missing_protections(self, missing_protections: List[MissingProtection]) -> List[MissingProtection]:
345
  """
 
347
  """
348
  if not missing_protections:
349
  return []
350
+
351
  # Sort by risk score (descending)
352
  missing_protections.sort(key = lambda p: p.risk_score, reverse = True)
353
+
354
  # Boost priority for contract-type specific critical protections
355
  for protection in missing_protections:
356
+ # Use the protection_id for the check
357
+ if protection.protection_id in self.protection_priorities:
358
  # Boost for contract relevance
359
+ protection.risk_score += 10
360
+
361
  # Re-sort with boosted scores
362
  missing_protections.sort(key = lambda p: p.risk_score, reverse = True)
363
 
364
+
365
  # Return top 15 most critical missing protections
366
+ top_missing_protections = missing_protections[:15]
367
+
368
+ return top_missing_protections
369
+
370
 
371
  def get_critical_missing(self, protections: List[MissingProtection]) -> List[MissingProtection]:
372
  """
373
  Filter to only critical missing protections
374
  """
375
  critical = [p for p in protections if (p.importance == "critical")]
376
+
377
  log_info(f"Found {len(critical)} critical missing protections")
378
+
379
  return critical
380
+
381
 
382
  def get_by_category(self, protections: List[MissingProtection], category: str) -> List[MissingProtection]:
383
  """
384
  Filter protections by category
385
  """
386
+ filtered = [p for p in protections if category in p.categories]
387
+
388
  log_info(f"Found {len(filtered)} missing protections in category '{category}'")
389
+
390
  return filtered
391
+
392
 
393
  def get_importance_distribution(self, protections: List[MissingProtection]) -> Dict[str, int]:
394
  """
 
396
  """
397
  distribution = {"critical" : 0,
398
  "high" : 0,
399
+ "medium" : 0,
400
  "low" : 0,
401
  }
402
+
403
  for protection in protections:
404
  distribution[protection.importance] = distribution.get(protection.importance, 0) + 1
405
+
406
  log_info("Missing protections importance distribution", **distribution)
407
+
408
  return distribution
409
+
410
 
411
  def get_risk_score_summary(self, protections: List[MissingProtection]) -> Dict[str, float]:
412
  """
413
  Get risk score summary statistics
414
  """
415
  if not protections:
416
+ return {"total_risk" : 0,
417
+ "average_risk" : 0,
418
+ "max_risk" : 0,
419
+ }
420
+
421
+ scores = [p.risk_score for p in protections]
422
+ total_risk = sum(scores)
423
+ average_risk = total_risk / len(scores)
424
+ max_risk = max(scores)
425
+
426
+ summary = {"total_risk" : round(total_risk, 2),
427
+ "average_risk" : round(average_risk, 2),
428
+ "max_risk" : round(max_risk, 2),
429
+ }
430
+
431
  log_info("Missing protections risk score summary", **summary)
432
+
433
+ return summary
services/risk_analyzer.py CHANGED
@@ -7,7 +7,6 @@ from typing import Tuple
7
  from pathlib import Path
8
  from typing import Optional
9
  from dataclasses import field
10
- from dataclasses import dataclass
11
  from collections import defaultdict
12
 
13
  # Add parent directory to path for imports
@@ -17,74 +16,20 @@ from utils.logger import log_info
17
  from utils.logger import log_error
18
  from config.risk_rules import RiskRules
19
  from config.risk_rules import ContractType
20
- from utils.logger import ContractAnalyzerLogger
21
  from services.term_analyzer import TermAnalyzer
22
- from services.term_analyzer import UnfavorableTerm
23
- from services.clause_extractor import RiskClauseExtractor
 
 
 
24
  from services.protection_checker import ProtectionChecker
25
- from services.protection_checker import MissingProtection
26
  from services.contract_classifier import ContractCategory
27
  from services.contract_classifier import ContractClassifier
28
  from services.clause_extractor import ComprehensiveClauseExtractor
29
 
30
 
31
-
32
- @dataclass
33
- class RiskBreakdownItem:
34
- """
35
- Individual risk category breakdown
36
- """
37
- category : str
38
- score : int # 0-100
39
- summary : str
40
- findings : List[str] = field(default_factory = list)
41
-
42
-
43
- def to_dict(self) -> Dict[str, Any]:
44
- """
45
- Convert to dictionary
46
- """
47
- return {"category" : self.category,
48
- "score" : self.score,
49
- "summary" : self.summary,
50
- "findings" : self.findings,
51
- }
52
-
53
-
54
- @dataclass
55
- class RiskScore:
56
- """
57
- Comprehensive risk score with detailed breakdown
58
- """
59
- overall_score : int # 0-100
60
- risk_level : str # "CRITICAL", "HIGH", "MEDIUM", "LOW"
61
- category_scores : Dict[str, int] # Scores for each risk category
62
- risk_factors : List[str] # Top risk factors
63
- detailed_findings : Dict[str, List[str]] # Findings by category
64
- benchmark_comparison : Dict[str, str] # Market comparisons
65
- risk_breakdown : List[RiskBreakdownItem] # Detailed breakdown
66
- contract_type : str # Detected contract type
67
- unfavorable_terms : List[Dict] # Unfavorable terms found
68
- missing_protections : List[Dict] # Missing critical protections
69
-
70
-
71
- def to_dict(self) -> Dict[str, Any]:
72
- """
73
- Convert to dictionary for serialization
74
- """
75
- return {"overall_score" : self.overall_score,
76
- "risk_level" : self.risk_level,
77
- "category_scores" : self.category_scores,
78
- "risk_factors" : self.risk_factors,
79
- "detailed_findings" : self.detailed_findings,
80
- "benchmark_comparison" : self.benchmark_comparison,
81
- "risk_breakdown" : [item.to_dict() for item in self.risk_breakdown],
82
- "contract_type" : self.contract_type,
83
- "unfavorable_terms" : self.unfavorable_terms,
84
- "missing_protections" : self.missing_protections,
85
- }
86
-
87
-
88
  class RiskAnalyzer:
89
  """
90
  Orchestrates all analysis components and calculates comprehensive risk scores
@@ -92,7 +37,7 @@ class RiskAnalyzer:
92
  Analysis Pipeline:
93
  1. Contract Classification
94
  2. Clause Extraction
95
- 3.Term Analysis
96
  4. Protection Checking
97
  5. Risk Scoring
98
  """
@@ -110,8 +55,7 @@ class RiskAnalyzer:
110
 
111
  # Initialize all analysis components
112
  self.contract_classifier = ContractClassifier(model_loader = model_loader)
113
- self.clause_extractor = ComprehensiveClauseExtractor(model_loader = model_loader)
114
- self.risk_clause_extractor = None # Lazy initialization
115
  self.term_analyzer = TermAnalyzer()
116
  self.protection_checker = ProtectionChecker()
117
 
@@ -121,7 +65,7 @@ class RiskAnalyzer:
121
  @ContractAnalyzerLogger.log_execution_time("analyze_contract_risk")
122
  def analyze_contract_risk(self, contract_text: str) -> RiskScore:
123
  """
124
- MAIN ENTRY POINT: Comprehensive contract risk analysis
125
 
126
  Arguments:
127
  ----------
@@ -134,24 +78,24 @@ class RiskAnalyzer:
134
 
135
  log_info("Starting Comprehensive Contract Risk Analysis...", text_length = len(contract_text))
136
 
137
- # CONTRACT CLASSIFICATION
138
  contract_category = self._classify_contract(contract_text = contract_text)
139
- log_info("Phase 1 complete: Contract classified", contract_type = contract_category.category)
140
 
141
- # CLAUSE EXTRACTION
142
  clauses = self._extract_clauses(contract_text = contract_text,
143
  contract_category = contract_category,
144
  )
145
 
146
- log_info("Phase 2 complete: Clauses extracted", num_clauses = len(clauses))
147
 
148
- # UNFAVORABLE TERM ANALYSIS
149
  unfavorable_terms = self._analyze_unfavorable_terms(contract_text = contract_text,
150
  clauses = clauses,
151
  contract_category = contract_category,
152
  )
153
 
154
- log_info("Phase 3 complete: Unfavorable terms analyzed", num_unfavorable_terms = len(unfavorable_terms))
155
 
156
  # MISSING PROTECTIONS ANALYSIS
157
  missing_protections = self._analyze_missing_protections(contract_text = contract_text,
@@ -159,7 +103,7 @@ class RiskAnalyzer:
159
  contract_category = contract_category,
160
  )
161
 
162
- log_info("Phase 4 complete: Missing protections analyzed", num_missing_protections = len(missing_protections))
163
 
164
  # RISK SCORING & AGGREGATION
165
  risk_score = self._calculate_comprehensive_risk(contract_category = contract_category,
@@ -208,16 +152,25 @@ class RiskAnalyzer:
208
 
209
  def _extract_clauses(self, contract_text: str, contract_category) -> List:
210
  """
211
- Extract clauses from contract
212
  """
213
- log_info("Extracting clauses from contract...")
214
 
215
  try:
216
- # Use comprehensive extractor for broad coverage
217
- clauses = self.clause_extractor.extract_clauses(contract_text = contract_text,
218
- max_clauses = 25)
 
 
 
 
219
 
220
- log_info("Clause extraction successful",
 
 
 
 
 
221
  total_clauses = len(clauses),
222
  categories = [c.category for c in clauses])
223
 
@@ -230,13 +183,13 @@ class RiskAnalyzer:
230
 
231
  def _analyze_unfavorable_terms(self, contract_text: str, clauses: List, contract_category) -> List[UnfavorableTerm]:
232
  """
233
- Phase 3: Analyze for unfavorable terms
234
  """
235
  log_info("Analyzing unfavorable terms...")
236
 
237
  try:
238
  # Initialize term analyzer with contract type
239
- contract_type_enum = self._get_contract_type_enum(contract_category.category)
240
  self.term_analyzer = TermAnalyzer(contract_type = contract_type_enum)
241
 
242
  unfavorable_terms = self.term_analyzer.analyze_unfavorable_terms(contract_text = contract_text,
@@ -244,7 +197,7 @@ class RiskAnalyzer:
244
 
245
  log_info("Unfavorable terms analysis successful",
246
  total_terms = len(unfavorable_terms),
247
- critical = sum(1 for t in unfavorable_terms if t.severity == "critical"))
248
 
249
  return unfavorable_terms
250
 
@@ -255,21 +208,21 @@ class RiskAnalyzer:
255
 
256
  def _analyze_missing_protections(self, contract_text: str, clauses: List, contract_category) -> List[MissingProtection]:
257
  """
258
- Phase 4: Analyze for missing protections
259
  """
260
  log_info("Analyzing missing protections...")
261
 
262
  try:
263
  # Initialize protection checker with contract type
264
- contract_type_enum = self._get_contract_type_enum(contract_category.category)
265
  self.protection_checker = ProtectionChecker(contract_type = contract_type_enum)
266
 
267
- missing_protections = self.protection_checker.check_missing_protections(contract_text = contract_text,
268
- clauses = clauses)
269
 
270
  log_info("Missing protections analysis successful",
271
  total_missing = len(missing_protections),
272
- critical = sum(1 for p in missing_protections if p.importance == "critical"))
273
 
274
  return missing_protections
275
 
@@ -281,12 +234,12 @@ class RiskAnalyzer:
281
  def _calculate_comprehensive_risk(self, contract_category, clauses: List, unfavorable_terms: List[UnfavorableTerm], missing_protections: List[MissingProtection],
282
  contract_text: str) -> RiskScore:
283
  """
284
- Phase 5: Calculate comprehensive risk score using all analysis results
285
  """
286
  log_info("Calculating comprehensive risk score...")
287
 
288
  # Get contract type for risk rule adjustments
289
- contract_type_enum = self._get_contract_type_enum(contract_category.category)
290
  adjusted_weights = self.rules.get_adjusted_weights(contract_type_enum)
291
 
292
  # Initialize scoring containers
@@ -296,13 +249,13 @@ class RiskAnalyzer:
296
 
297
  # Calculate risk for each category
298
  for risk_category in adjusted_weights.keys():
299
- category_risk = self._calculate_category_risk(risk_category = risk_category,
300
- contract_type = contract_type_enum,
301
- clauses = clauses,
302
- unfavorable_terms = unfavorable_terms,
303
- missing_protections = missing_protections,
304
- contract_text = contract_text,
305
- )
306
 
307
  category_scores[risk_category] = category_risk["score"]
308
  detailed_findings[risk_category] = category_risk["findings"]
@@ -355,7 +308,7 @@ class RiskAnalyzer:
355
 
356
  for term in category_terms:
357
  # Scale appropriately
358
- base_score += term.risk_score * 0.1
359
 
360
  findings.append(f"{term.term}: {term.explanation}")
361
 
@@ -363,7 +316,8 @@ class RiskAnalyzer:
363
  category_protections = [p for p in missing_protections if risk_category in p.categories]
364
 
365
  for protection in category_protections:
366
- base_score += protection.risk_score * 0.15
 
367
  findings.append(f"Missing: {protection.protection}")
368
 
369
  # Score from clauses in this category
@@ -376,7 +330,8 @@ class RiskAnalyzer:
376
  risk_category = risk_category,
377
  contract_type = contract_type,
378
  )
379
- base_score += clause_risk["score"]
 
380
 
381
  findings.extend(clause_risk["findings"])
382
 
@@ -395,57 +350,41 @@ class RiskAnalyzer:
395
 
396
  def _get_clauses_for_risk_category(self, clauses: List, risk_category: str) -> List:
397
  """
398
- Map clauses to risk categories
399
  """
400
- category_mapping = {"restrictive_covenants" : ["non_compete", "confidentiality"],
401
- "termination_rights" : ["termination"],
402
- "penalties_liability" : ["indemnification", "liability"],
403
- "compensation_benefits" : ["compensation"],
404
- "intellectual_property" : ["intellectual_property"],
405
- "confidentiality" : ["confidentiality"],
406
- "liability_indemnity" : ["indemnification", "liability"],
407
- "governing_law" : ["dispute_resolution"],
408
- "payment_terms" : ["compensation"],
409
- "warranties" : ["warranty"],
410
- "dispute_resolution" : ["dispute_resolution"],
411
- "assignment_change" : ["assignment", "amendment"],
412
- "insurance" : ["insurance"],
413
- "force_majeure" : ["force_majeure"],
414
- }
415
-
416
- target_categories = category_mapping.get(risk_category, [])
417
-
418
- return [c for c in clauses if c.category in target_categories]
419
-
420
 
421
  def _analyze_clause_risk(self, clause, risk_category: str, contract_type: ContractType) -> Dict:
422
  """
423
  Analyze individual clause risk using RiskRules factors
424
  """
425
- risk_factors = self.rules.CLAUSE_RISK_FACTORS
426
-
427
- # Map clause category to risk factors
428
- factor_mapping = {"non_compete" : "restrictive_covenants",
429
- "termination" : "termination_rights",
430
- "indemnification" : "liability_indemnity",
431
- "compensation" : "compensation_benefits",
432
- "intellectual_property" : "intellectual_property",
433
- "confidentiality" : "confidentiality",
434
- "liability" : "penalties_liability",
435
- "warranty" : "warranties",
436
- "dispute_resolution" : "dispute_resolution",
437
- "assignment" : "assignment_change",
438
- "amendment" : "assignment_change",
439
- "insurance" : "insurance",
440
- "force_majeure" : "force_majeure",
441
- }
442
-
443
- risk_factor_key = factor_mapping.get(clause.category)
444
-
445
- if not risk_factor_key or risk_factor_key not in risk_factors:
446
  return {"score": 0, "findings": []}
447
 
448
- factor_config = risk_factors[risk_factor_key]
449
  base_risk = factor_config.get("base_risk", 50)
450
  text_lower = clause.text.lower()
451
 
@@ -461,13 +400,14 @@ class RiskAnalyzer:
461
  findings.append(f"Red flag: '{red_flag}' ({severity} risk by {abs(adjustment)})")
462
 
463
  # Apply contract-type specific multiplier
464
- type_adjustments = self.rules.CONTRACT_TYPE_ADJUSTMENTS.get(contract_type.value, {})
465
  category_multiplier = type_adjustments.get(risk_category, 1.0)
466
 
467
- risk_score *= category_multiplier
468
 
469
  return {"score" : max(0, min(100, risk_score)),
470
- "findings" : findings}
 
471
 
472
 
473
  def _calculate_weighted_score(self, category_scores: Dict[str, int], adjusted_weights: Dict[str, float]) -> int:
 
7
  from pathlib import Path
8
  from typing import Optional
9
  from dataclasses import field
 
10
  from collections import defaultdict
11
 
12
  # Add parent directory to path for imports
 
16
  from utils.logger import log_error
17
  from config.risk_rules import RiskRules
18
  from config.risk_rules import ContractType
19
+ from services.data_models import RiskScore
20
  from services.term_analyzer import TermAnalyzer
21
+ from utils.logger import ContractAnalyzerLogger
22
+ from services.data_models import ExtractedClause
23
+ from services.data_models import UnfavorableTerm
24
+ from services.data_models import MissingProtection
25
+ from services.data_models import RiskBreakdownItem
26
  from services.protection_checker import ProtectionChecker
27
+ from services.clause_extractor import RiskClauseExtractor
28
  from services.contract_classifier import ContractCategory
29
  from services.contract_classifier import ContractClassifier
30
  from services.clause_extractor import ComprehensiveClauseExtractor
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  class RiskAnalyzer:
34
  """
35
  Orchestrates all analysis components and calculates comprehensive risk scores
 
37
  Analysis Pipeline:
38
  1. Contract Classification
39
  2. Clause Extraction
40
+ 3. Term Analysis
41
  4. Protection Checking
42
  5. Risk Scoring
43
  """
 
55
 
56
  # Initialize all analysis components
57
  self.contract_classifier = ContractClassifier(model_loader = model_loader)
58
+ self.risk_clause_extractor = None # Will be initialized with contract type
 
59
  self.term_analyzer = TermAnalyzer()
60
  self.protection_checker = ProtectionChecker()
61
 
 
65
  @ContractAnalyzerLogger.log_execution_time("analyze_contract_risk")
66
  def analyze_contract_risk(self, contract_text: str) -> RiskScore:
67
  """
68
+ Comprehensive contract risk analysis
69
 
70
  Arguments:
71
  ----------
 
78
 
79
  log_info("Starting Comprehensive Contract Risk Analysis...", text_length = len(contract_text))
80
 
81
+ # Contract Classification
82
  contract_category = self._classify_contract(contract_text = contract_text)
83
+ log_info("Contract classified", contract_type = contract_category.category)
84
 
85
+ # Clause Extraction: RiskClauseExtractor
86
  clauses = self._extract_clauses(contract_text = contract_text,
87
  contract_category = contract_category,
88
  )
89
 
90
+ log_info("Clauses extracted", num_clauses = len(clauses))
91
 
92
+ # Unfavourable Term Analysis
93
  unfavorable_terms = self._analyze_unfavorable_terms(contract_text = contract_text,
94
  clauses = clauses,
95
  contract_category = contract_category,
96
  )
97
 
98
+ log_info("Unfavorable terms analyzed", num_unfavorable_terms = len(unfavorable_terms))
99
 
100
  # MISSING PROTECTIONS ANALYSIS
101
  missing_protections = self._analyze_missing_protections(contract_text = contract_text,
 
103
  contract_category = contract_category,
104
  )
105
 
106
+ log_info("Missing protections analyzed", num_missing_protections = len(missing_protections))
107
 
108
  # RISK SCORING & AGGREGATION
109
  risk_score = self._calculate_comprehensive_risk(contract_category = contract_category,
 
152
 
153
  def _extract_clauses(self, contract_text: str, contract_category) -> List:
154
  """
155
+ Extract clauses from contract using RiskClauseExtractor
156
  """
157
+ log_info("Extracting RISK-FOCUSED clauses from contract...")
158
 
159
  try:
160
+ # Get contract type enum
161
+ contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
162
+
163
+ # Initialize RiskClauseExtractor (NOT ComprehensiveClauseExtractor)
164
+ self.risk_clause_extractor = RiskClauseExtractor(model_loader = self.model_loader,
165
+ contract_type = contract_type_enum,
166
+ )
167
 
168
+ # Use RiskClauseExtractor which outputs risk categories
169
+ clauses = self.risk_clause_extractor.extract_risk_clauses(contract_text = contract_text,
170
+ max_clauses = 50,
171
+ )
172
+
173
+ log_info("Risk-focused clause extraction successful",
174
  total_clauses = len(clauses),
175
  categories = [c.category for c in clauses])
176
 
 
183
 
184
  def _analyze_unfavorable_terms(self, contract_text: str, clauses: List, contract_category) -> List[UnfavorableTerm]:
185
  """
186
+ Analyze for unfavorable terms (using risk categories from RiskClauseExtractor)
187
  """
188
  log_info("Analyzing unfavorable terms...")
189
 
190
  try:
191
  # Initialize term analyzer with contract type
192
+ contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
193
  self.term_analyzer = TermAnalyzer(contract_type = contract_type_enum)
194
 
195
  unfavorable_terms = self.term_analyzer.analyze_unfavorable_terms(contract_text = contract_text,
 
197
 
198
  log_info("Unfavorable terms analysis successful",
199
  total_terms = len(unfavorable_terms),
200
+ critical = sum(1 for t in unfavorable_terms if (t.severity == "critical")))
201
 
202
  return unfavorable_terms
203
 
 
208
 
209
  def _analyze_missing_protections(self, contract_text: str, clauses: List, contract_category) -> List[MissingProtection]:
210
  """
211
+ Analyze for missing protections
212
  """
213
  log_info("Analyzing missing protections...")
214
 
215
  try:
216
  # Initialize protection checker with contract type
217
+ contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
218
  self.protection_checker = ProtectionChecker(contract_type = contract_type_enum)
219
 
220
+ missing_protections = self.protection_checker.check_missing_protections(contract_text = contract_text,
221
+ clauses = clauses)
222
 
223
  log_info("Missing protections analysis successful",
224
  total_missing = len(missing_protections),
225
+ critical = sum(1 for p in missing_protections if (p.importance == "critical")))
226
 
227
  return missing_protections
228
 
 
234
  def _calculate_comprehensive_risk(self, contract_category, clauses: List, unfavorable_terms: List[UnfavorableTerm], missing_protections: List[MissingProtection],
235
  contract_text: str) -> RiskScore:
236
  """
237
+ Calculate comprehensive risk score using all analysis results
238
  """
239
  log_info("Calculating comprehensive risk score...")
240
 
241
  # Get contract type for risk rule adjustments
242
+ contract_type_enum = self._get_contract_type_enum(category_str = contract_category.category)
243
  adjusted_weights = self.rules.get_adjusted_weights(contract_type_enum)
244
 
245
  # Initialize scoring containers
 
249
 
250
  # Calculate risk for each category
251
  for risk_category in adjusted_weights.keys():
252
+ category_risk = self._calculate_category_risk(risk_category = risk_category,
253
+ contract_type = contract_type_enum,
254
+ clauses = clauses,
255
+ unfavorable_terms = unfavorable_terms,
256
+ missing_protections = missing_protections,
257
+ contract_text = contract_text,
258
+ )
259
 
260
  category_scores[risk_category] = category_risk["score"]
261
  detailed_findings[risk_category] = category_risk["findings"]
 
308
 
309
  for term in category_terms:
310
  # Scale appropriately
311
+ base_score += term.risk_score * 0.4
312
 
313
  findings.append(f"{term.term}: {term.explanation}")
314
 
 
316
  category_protections = [p for p in missing_protections if risk_category in p.categories]
317
 
318
  for protection in category_protections:
319
+ base_score += protection.risk_score * 0.3
320
+
321
  findings.append(f"Missing: {protection.protection}")
322
 
323
  # Score from clauses in this category
 
330
  risk_category = risk_category,
331
  contract_type = contract_type,
332
  )
333
+
334
+ base_score += clause_risk["score"] * 0.3
335
 
336
  findings.extend(clause_risk["findings"])
337
 
 
350
 
351
  def _get_clauses_for_risk_category(self, clauses: List, risk_category: str) -> List:
352
  """
353
+ Map clauses to risk categories (now clauses are already in risk categories)
354
  """
355
+ # clauses.category is already a risk category from RiskClauseExtractor
356
+ clauses_for_risk_category = [c for c in clauses if (c.category == risk_category)]
357
+
358
+ return clauses_for_risk_category
359
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
  def _analyze_clause_risk(self, clause, risk_category: str, contract_type: ContractType) -> Dict:
362
  """
363
  Analyze individual clause risk using RiskRules factors
364
  """
365
+ risk_factors = self.rules.CLAUSE_RISK_FACTORS
366
+
367
+ # Map RISK category (e.g., "restrictive_covenants") to CLAUSE category (e.g., "non_compete")
368
+ factor_mapping = {"restrictive_covenants" : "non_compete",
369
+ "termination_rights" : "termination",
370
+ "liability_indemnity" : "indemnification",
371
+ "compensation_benefits" : "compensation",
372
+ "intellectual_property" : "intellectual_property",
373
+ "confidentiality" : "confidentiality",
374
+ "penalties_liability" : "liability",
375
+ "warranties" : "warranty",
376
+ "dispute_resolution" : "dispute_resolution",
377
+ "assignment_change" : "assignment",
378
+ "insurance" : "insurance",
379
+ "force_majeure" : "force_majeure",
380
+ }
381
+
382
+ clause_category_key = factor_mapping.get(risk_category)
383
+
384
+ if not clause_category_key or clause_category_key not in risk_factors:
 
385
  return {"score": 0, "findings": []}
386
 
387
+ factor_config = risk_factors[clause_category_key]
388
  base_risk = factor_config.get("base_risk", 50)
389
  text_lower = clause.text.lower()
390
 
 
400
  findings.append(f"Red flag: '{red_flag}' ({severity} risk by {abs(adjustment)})")
401
 
402
  # Apply contract-type specific multiplier
403
+ type_adjustments = self.rules.CONTRACT_TYPE_ADJUSTMENTS.get(contract_type.value, {})
404
  category_multiplier = type_adjustments.get(risk_category, 1.0)
405
 
406
+ risk_score *= category_multiplier
407
 
408
  return {"score" : max(0, min(100, risk_score)),
409
+ "findings" : findings,
410
+ }
411
 
412
 
413
  def _calculate_weighted_score(self, category_scores: Dict[str, int], adjusted_weights: Dict[str, float]) -> int:
services/summary_generator.py CHANGED
@@ -1,417 +1,337 @@
1
  # DEPENDENCIES
2
- import logging
3
- from typing import Dict, List, Optional, Any
4
- from dataclasses import dataclass
5
-
6
- from utils.logger import ContractAnalyzerLogger
7
- from model_manager.llm_manager import LLMManager, LLMProvider
8
- from services.risk_analyzer import RiskScore
9
- from services.llm_interpreter import RiskInterpretation
10
- from services.negotiation_engine import NegotiationPlaybook
11
- from services.contract_classifier import ContractCategory
12
-
13
- logger = ContractAnalyzerLogger.get_logger()
14
 
 
 
15
 
16
- @dataclass
17
- class SummaryContext:
18
- """
19
- Context data for comprehensive summary generation
20
- """
21
- contract_type: str
22
- risk_score: int
23
- risk_level: str
24
- category_scores: Dict[str, int]
25
- unfavorable_terms: List[Dict]
26
- missing_protections: List[Dict]
27
- clauses: List
28
- key_findings: List[str]
29
- # NEW: Full pipeline integration
30
- risk_interpretation: Optional[RiskInterpretation] = None
31
- negotiation_playbook: Optional[NegotiationPlaybook] = None
32
- contract_text_preview: Optional[str] = None
33
- contract_metadata: Optional[Dict[str, Any]] = None
34
 
35
 
36
  class SummaryGenerator:
37
  """
38
- LLM-powered executive summary generator for contract analysis
39
- Generates professional, detailed executive summaries using ALL pipeline outputs
40
  """
41
-
42
  def __init__(self, llm_manager: Optional[LLMManager] = None):
43
  """
44
  Initialize the summary generator
45
 
46
- Args:
47
- llm_manager: LLM manager instance (if None, creates one with default settings)
 
48
  """
49
  self.llm_manager = llm_manager or LLMManager()
50
- self.logger = ContractAnalyzerLogger.get_logger()
51
-
52
- logger.info("Summary generator initialized")
53
-
54
- # ENHANCED: Main entry point with full pipeline integration
55
- def generate_comprehensive_summary(self,
56
- contract_text: str,
57
- classification: ContractCategory,
58
- risk_analysis: RiskScore,
59
- risk_interpretation: RiskInterpretation,
60
- negotiation_playbook: NegotiationPlaybook,
61
- unfavorable_terms: List[Dict],
62
- missing_protections: List[Dict],
63
- clauses: List) -> str:
64
- """
65
- Generate comprehensive executive summary using ALL pipeline outputs
66
-
67
- Args:
68
- contract_text: Original contract text (for context)
69
- classification: Contract classification results
70
- risk_analysis: Complete risk analysis
71
- risk_interpretation: LLM-enhanced risk explanations
72
- negotiation_playbook: Comprehensive negotiation strategy
73
- unfavorable_terms: Detected unfavorable terms
74
- missing_protections: Missing protections
75
- clauses: Extracted clauses
 
 
76
 
77
  Returns:
78
- Generated executive summary string
 
79
  """
80
  try:
81
- # Prepare enhanced context with ALL pipeline data
82
- context = self._prepare_comprehensive_context(
83
- contract_text=contract_text,
84
- classification=classification,
85
- risk_analysis=risk_analysis,
86
- risk_interpretation=risk_interpretation,
87
- negotiation_playbook=negotiation_playbook,
88
- unfavorable_terms=unfavorable_terms,
89
- missing_protections=missing_protections,
90
- clauses=clauses
91
- )
92
 
93
- # Generate enhanced summary using LLM
94
- summary = self._generate_enhanced_summary(context)
95
 
96
- logger.info(f"Comprehensive executive summary generated - Risk: {context.risk_score}/100 ({context.risk_level})")
97
 
98
  return summary
99
 
100
  except Exception as e:
101
- logger.error(f"Failed to generate comprehensive summary: {e}")
102
 
103
- # Enhanced fallback with available data
104
- return self._generate_enhanced_fallback_summary(
105
- contract_text=contract_text,
106
- classification=classification,
107
- risk_analysis=risk_analysis,
108
- unfavorable_terms=unfavorable_terms,
109
- missing_protections=missing_protections
110
- )
111
 
112
- def _prepare_comprehensive_context(self,
113
- contract_text: str,
114
- classification: ContractCategory,
115
- risk_analysis: RiskScore,
116
- risk_interpretation: RiskInterpretation,
117
- negotiation_playbook: NegotiationPlaybook,
118
- unfavorable_terms: List[Dict],
119
- missing_protections: List[Dict],
120
- clauses: List) -> SummaryContext:
121
- """Prepare comprehensive context with ALL pipeline data"""
122
-
123
- # Extract contract preview
124
- contract_preview = contract_text[:500] + "..." if len(contract_text) > 500 else contract_text
125
-
126
- # Extract key findings from ALL sources
127
- key_findings = self._extract_comprehensive_findings(
128
- risk_analysis=risk_analysis,
129
- risk_interpretation=risk_interpretation,
130
- negotiation_playbook=negotiation_playbook,
131
- unfavorable_terms=unfavorable_terms,
132
- missing_protections=missing_protections,
133
- clauses=clauses
134
- )
135
-
136
  # Prepare metadata
137
- metadata = {
138
- "contract_length": len(contract_text),
139
- "clauses_analyzed": len(clauses),
140
- "critical_issues": len([t for t in unfavorable_terms if self._get_severity(t) == "critical"]),
141
- "walk_away_items": len(negotiation_playbook.walk_away_items) if negotiation_playbook else 0
142
- }
143
-
144
- return SummaryContext(
145
- contract_type=classification.category,
146
- risk_score=risk_analysis.overall_score,
147
- risk_level=risk_analysis.risk_level,
148
- category_scores=risk_analysis.category_scores,
149
- unfavorable_terms=unfavorable_terms,
150
- missing_protections=missing_protections,
151
- clauses=clauses,
152
- key_findings=key_findings,
153
- risk_interpretation=risk_interpretation,
154
- negotiation_playbook=negotiation_playbook,
155
- contract_text_preview=contract_preview,
156
- contract_metadata=metadata
157
- )
158
 
159
- def _extract_comprehensive_findings(self,
160
- risk_analysis: RiskScore,
161
- risk_interpretation: RiskInterpretation,
162
- negotiation_playbook: NegotiationPlaybook,
163
- unfavorable_terms: List[Dict],
164
- missing_protections: List[Dict],
165
- clauses: List) -> List[str]:
166
- """Extract comprehensive findings from ALL analysis components"""
167
-
168
- findings = []
169
-
170
- # 1. Overall risk context
171
- if risk_analysis.overall_score >= 80:
172
  findings.append("CRITICAL RISK LEVEL: Contract presents unacceptable risk requiring immediate attention")
173
- elif risk_analysis.overall_score >= 60:
 
174
  findings.append("HIGH RISK LEVEL: Significant concerns requiring substantial negotiation")
175
 
176
- # 2. Critical unfavorable terms
177
- critical_terms = [t for t in unfavorable_terms if self._get_severity(t) == "critical"]
 
178
  if critical_terms:
179
  findings.append(f"{len(critical_terms)} CRITICAL unfavorable terms identified")
180
  for term in critical_terms[:2]:
181
- term_name = self._get_term_name(term)
 
182
  findings.append(f"Critical: {term_name}")
183
 
184
- # 3. Critical missing protections
185
- critical_protections = [p for p in missing_protections if self._get_importance(p) == "critical"]
 
186
  if critical_protections:
187
  findings.append(f"{len(critical_protections)} CRITICAL protections missing")
188
  for prot in critical_protections[:2]:
189
- prot_name = self._get_protection_name(prot)
 
190
  findings.append(f"Missing: {prot_name}")
191
 
192
- # 4. High-risk categories
193
- high_risk_categories = [cat for cat, score in risk_analysis.category_scores.items()
194
- if score >= 70]
195
  if high_risk_categories:
196
  findings.append(f"High-risk categories: {', '.join(high_risk_categories)}")
197
 
198
- # 5. Walk-away items from negotiation playbook
199
  if negotiation_playbook and negotiation_playbook.walk_away_items:
200
  findings.append(f"{len(negotiation_playbook.walk_away_items)} potential deal-breakers identified")
201
 
202
- # 6. Key concerns from risk interpretation
203
  if risk_interpretation and risk_interpretation.key_concerns:
204
  top_concerns = risk_interpretation.key_concerns[:2]
205
  for concern in top_concerns:
206
  findings.append(f"Key concern: {concern}")
207
 
208
- return findings[:8] # Return top 8 findings
209
 
210
- def _generate_enhanced_summary(self, context: SummaryContext) -> str:
211
- """Generate enhanced summary using comprehensive context"""
212
-
213
- prompt = self._build_enhanced_summary_prompt(context)
214
- system_prompt = self._build_enhanced_system_prompt()
 
 
215
 
216
  try:
217
- response = self.llm_manager.complete(
218
- prompt=prompt,
219
- system_prompt=system_prompt,
220
- temperature=0.3,
221
- max_tokens=500, # Increased for comprehensive summary
222
- json_mode=False
223
- )
224
-
225
  if response.success and response.text.strip():
226
- return self._clean_summary_response(response.text)
 
227
  else:
228
  raise ValueError(f"LLM generation failed: {response.error_message}")
229
 
230
  except Exception as e:
231
- logger.error(f"Enhanced LLM summary generation failed: {e}")
232
  # Fallback to basic summary
233
- return self._generate_enhanced_fallback_summary_from_context(context)
234
 
235
- def _build_enhanced_system_prompt(self) -> str:
236
- """Build enhanced system prompt for comprehensive summary generation"""
237
-
238
- return """You are a senior legal analyst and strategic advisor specializing in contract risk assessment.
239
 
240
- YOUR ROLE:
241
- Generate comprehensive, professional executive summaries that synthesize technical risk analysis with practical business implications.
 
 
 
 
242
 
243
- KEY REQUIREMENTS:
244
- 1. Write in formal, professional business language suitable for executives
245
- 2. Synthesize ALL analysis components into cohesive narrative
246
- 3. Focus on strategic implications and decision-making
247
- 4. Maintain objective, factual tone while highlighting critical risks
248
- 5. Keep summary length between 150-300 words
249
- 6. Structure: Overall assessment → Critical risks → Strategic implications → Recommended approach
250
 
251
- CONTENT FOCUS:
252
- - Start with overall risk assessment and contract type context
253
- - Highlight 2-3 most critical risks with practical consequences
254
- - Mention key missing protections and their business impact
255
- - Reference negotiation strategy and deal-breakers
256
- - Conclude with clear recommended next steps
257
 
258
- WRITING STYLE:
259
- - Executive-level business language
260
- - Focus on decision-making implications
261
- - Avoid markdown formatting
262
- - Be direct, actionable, and strategic
263
- - Connect legal risks to business outcomes
264
 
265
- OUTPUT FORMAT:
266
- Return only the executive summary text, no headings, no bullet points, no role rescription, just clean paragraph text. Also write the summary in passive voice only."""
 
 
 
 
267
 
268
- def _build_enhanced_summary_prompt(self, context: SummaryContext) -> str:
269
- """Build detailed prompt for comprehensive summary generation"""
270
-
271
- # Build comprehensive context sections
272
- risk_context = self._build_enhanced_risk_context(context)
273
- critical_issues = self._build_critical_issues_context(context)
274
- strategic_context = self._build_strategic_context(context)
275
- negotiation_context = self._build_negotiation_context(context)
276
 
277
- prompt = f"""
278
- COMPREHENSIVE CONTRACT ANALYSIS:
279
-
280
- {risk_context}
281
-
282
- {critical_issues}
283
-
284
- {strategic_context}
285
-
286
- {negotiation_context}
287
 
288
- GENERATION INSTRUCTIONS:
289
- Based on the comprehensive analysis above, write a professional executive summary that:
290
 
291
- 1. Starts with overall risk assessment for this {context.contract_type} agreement
292
- 2. Highlights the most critical risks and their business implications
293
- 3. Mentions key missing protections and unfavorable terms
294
- 4. References the negotiation strategy and potential deal-breakers
295
- 5. Provides clear, actionable recommendations for next steps
296
-
297
- Focus on synthesizing all analysis components into a cohesive strategic assessment that supports executive decision-making.
298
- """
299
- return prompt
300
-
301
- def _build_enhanced_risk_context(self, context: SummaryContext) -> str:
302
- """Build enhanced risk assessment context"""
303
-
304
- risk_level_descriptions = {
305
- "CRITICAL": "CRITICAL level of risk requiring immediate executive attention",
306
- "HIGH": "HIGH level of risk requiring significant review and negotiation",
307
- "MEDIUM": "MODERATE level of risk with specific concerns to address",
308
- "LOW": "LOW level of risk, generally favorable with minor improvements needed"
309
- }
310
-
311
- risk_desc = risk_level_descriptions.get(context.risk_level, "Requires professional review")
312
-
313
- text = f"OVERALL RISK ASSESSMENT:\n"
314
- text += f"- Risk Score: {context.risk_score}/100 ({risk_desc})\n"
315
- text += f"- Contract Type: {context.contract_type.replace('_', ' ').title()}\n"
316
- text += f"- Analysis Scope: {context.contract_metadata.get('clauses_analyzed', 0)} clauses analyzed\n"
317
-
318
- # Top risk categories
319
- if context.category_scores:
320
- high_risk_categories = [(cat, score) for cat, score in context.category_scores.items()
321
- if score >= 60]
322
- if high_risk_categories:
323
- text += "- Highest Risk Categories:\n"
324
- for category, score in sorted(high_risk_categories, key=lambda x: x[1], reverse=True)[:3]:
325
- category_name = category.replace('_', ' ').title()
326
- text += f" * {category_name}: {score}/100\n"
327
-
328
- return text
329
-
330
- def _build_critical_issues_context(self, context: SummaryContext) -> str:
331
- """Build context about critical issues"""
332
 
333
- text = "CRITICAL ISSUES IDENTIFIED:\n"
 
 
 
334
 
335
- # Critical unfavorable terms
336
- critical_terms = [t for t in context.unfavorable_terms if self._get_severity(t) == "critical"]
337
  if critical_terms:
338
- text += f"- Critical Unfavorable Terms: {len(critical_terms)}\n"
339
- for term in critical_terms[:2]:
340
- term_name = self._get_term_name(term)
341
- explanation = self._get_explanation(term)
342
- text += f" * {term_name}: {explanation}\n"
 
343
 
344
- # Critical missing protections
345
- critical_protections = [p for p in context.missing_protections if self._get_importance(p) == "critical"]
346
  if critical_protections:
347
- text += f"- Critical Missing Protections: {len(critical_protections)}\n"
348
- for prot in critical_protections[:2]:
349
- prot_name = self._get_protection_name(prot)
350
- explanation = self._get_explanation(prot)
351
- text += f" * {prot_name}: {explanation}\n"
352
-
353
- # Key concerns from risk interpretation
354
- if context.risk_interpretation and context.risk_interpretation.key_concerns:
355
- text += f"- Key Strategic Concerns: {len(context.risk_interpretation.key_concerns)}\n"
356
- for concern in context.risk_interpretation.key_concerns[:2]:
357
- text += f" * {concern}\n"
358
-
359
- if not critical_terms and not critical_protections:
360
- text += "- No critical issues identified\n"
361
-
362
- return text
363
-
364
- def _build_strategic_context(self, context: SummaryContext) -> str:
365
- """Build strategic context from risk interpretation"""
366
-
367
- text = "STRATEGIC ASSESSMENT:\n"
368
-
369
- if context.risk_interpretation:
370
- text += f"- Overall Risk Explanation: {context.risk_interpretation.overall_risk_explanation}\n"
371
 
372
- if context.risk_interpretation.market_comparison:
373
- text += f"- Market Context: {context.risk_interpretation.market_comparison}\n"
374
-
375
- # Contract complexity context
376
- if context.contract_metadata:
377
- if context.contract_metadata['contract_length'] > 10000:
378
- text += "- Complex Agreement: Extensive contract requiring detailed review\n"
379
- elif context.contract_metadata['critical_issues'] > 0:
380
- text += "- High Attention Required: Contains critical issues needing resolution\n"
381
 
382
- return text
383
-
384
- def _build_negotiation_context(self, context: SummaryContext) -> str:
385
- """Build negotiation strategy context"""
386
 
387
- text = "NEGOTIATION STRATEGY:\n"
388
-
389
- if context.negotiation_playbook:
390
- text += f"- Overall Approach: {context.negotiation_playbook.overall_strategy}\n"
391
-
392
- if context.negotiation_playbook.walk_away_items:
393
- text += f"- Deal-Breakers: {len(context.negotiation_playbook.walk_away_items)} critical items\n"
394
- for item in context.negotiation_playbook.walk_away_items[:2]:
395
- text += f" * {item}\n"
396
-
397
- if context.negotiation_playbook.critical_points:
398
- text += f"- Priority Negotiation Points: {len(context.negotiation_playbook.critical_points)}\n"
399
-
400
- text += f"- Timing Guidance: {context.negotiation_playbook.timing_guidance}\n"
401
  else:
402
- text += "- Standard negotiation approach recommended\n"
403
-
404
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
 
 
406
  def _clean_summary_response(self, text: str) -> str:
407
- """Clean and format the LLM response"""
408
-
 
409
  # Remove any markdown formatting
410
- text = text.replace('**', '').replace('*', '').replace('#', '')
411
 
412
  # Remove common LLM artifacts and empty lines
413
- lines = text.split('\n')
414
- cleaned_lines = []
415
 
416
  for line in lines:
417
  line = line.strip()
@@ -431,150 +351,166 @@ Focus on synthesizing all analysis components into a cohesive strategic assessme
431
 
432
  return summary
433
 
434
- def _generate_enhanced_fallback_summary(self,
435
- contract_text: str,
436
- classification: ContractCategory,
437
- risk_analysis: RiskScore,
438
- unfavorable_terms: List[Dict],
439
- missing_protections: List[Dict]) -> str:
440
- """Generate enhanced fallback summary"""
441
-
442
  contract_type_display = classification.category.replace('_', ' ').title()
443
 
444
  # Count critical items
445
- critical_terms = len([t for t in unfavorable_terms if self._get_severity(t) == "critical"])
446
- critical_protections = len([p for p in missing_protections if self._get_importance(p) == "critical"])
447
 
448
- # Enhanced risk assessment
449
- if risk_analysis.overall_score >= 80:
450
  risk_assessment = f"This {contract_type_display} presents a CRITICAL level of risk"
451
- action = "requires immediate executive attention and significant revision before consideration"
452
- elif risk_analysis.overall_score >= 60:
 
453
  risk_assessment = f"This {contract_type_display} presents a HIGH level of risk"
454
- action = "requires careful legal review and substantial negotiation to mitigate key concerns"
455
- elif risk_analysis.overall_score >= 40:
 
456
  risk_assessment = f"This {contract_type_display} presents a MODERATE level of risk"
457
- action = "requires professional review and selective negotiation on specific provisions"
 
458
  else:
459
  risk_assessment = f"This {contract_type_display} presents a LOW level of risk"
460
  action = "appears generally reasonable but should undergo standard legal review"
461
 
462
- summary = f"{risk_assessment} with an overall risk score of {risk_analysis.overall_score}/100. "
463
  summary += f"The agreement {action}. "
464
 
465
  # Add critical items context
466
- if critical_terms > 0:
467
  summary += f"Analysis identified {critical_terms} critical unfavorable terms "
 
468
  if critical_protections > 0:
469
  summary += f"and {critical_protections} critical missing protections. "
 
470
  else:
471
  summary += f"and {len(missing_protections)} missing standard protections. "
 
472
  else:
473
  summary += f"Review identified {len(unfavorable_terms)} areas for improvement. "
474
 
475
  # Add high-risk categories context
476
- high_risk_categories = [cat for cat, score in risk_analysis.category_scores.items() if score >= 60]
 
477
  if high_risk_categories:
478
  category_names = [cat.replace('_', ' ').title() for cat in high_risk_categories[:2]]
479
- summary += f"Particular attention should be given to {', '.join(category_names)} provisions. "
480
 
481
  summary += "Proceed with the detailed negotiation strategy and risk mitigation recommendations provided in the full analysis."
482
 
483
  return summary
 
484
 
485
- def _generate_enhanced_fallback_summary_from_context(self, context: SummaryContext) -> str:
486
- """Generate fallback summary from context object"""
487
- return self._generate_enhanced_fallback_summary(
488
- contract_text=context.contract_text_preview or "",
489
- classification=type('MockClassification', (), {'category': context.contract_type})(),
490
- risk_analysis=type('MockRiskAnalysis', (), {
491
- 'overall_score': context.risk_score,
492
- 'risk_level': context.risk_level,
493
- 'category_scores': context.category_scores
494
- })(),
495
- unfavorable_terms=context.unfavorable_terms,
496
- missing_protections=context.missing_protections
497
- )
 
 
498
 
499
- # Helper methods for safe attribute access
500
  def _get_severity(self, term) -> str:
501
- """Safely get severity from term object or dict"""
 
 
502
  try:
503
- if hasattr(term, 'severity'):
504
  return term.severity
 
505
  else:
506
  return term.get('severity', 'unknown')
 
507
  except (AttributeError, KeyError):
508
  return 'unknown'
509
 
 
510
  def _get_importance(self, protection) -> str:
511
- """Safely get importance from protection object or dict"""
 
 
512
  try:
513
  if hasattr(protection, 'importance'):
514
  return protection.importance
 
515
  else:
516
  return protection.get('importance', 'unknown')
 
517
  except (AttributeError, KeyError):
518
  return 'unknown'
519
 
 
520
  def _get_term_name(self, term) -> str:
521
- """Safely get term name"""
 
 
522
  try:
523
  if hasattr(term, 'term'):
524
  return term.term
 
525
  else:
526
  return term.get('term', 'Unknown Term')
 
527
  except (AttributeError, KeyError):
528
  return 'Unknown Term'
529
 
 
530
  def _get_protection_name(self, protection) -> str:
531
- """Safely get protection name"""
 
 
532
  try:
533
  if hasattr(protection, 'protection'):
534
  return protection.protection
 
535
  else:
536
  return protection.get('protection', 'Unknown Protection')
 
537
  except (AttributeError, KeyError):
538
  return 'Unknown Protection'
539
 
 
540
  def _get_explanation(self, item) -> str:
541
- """Safely get explanation"""
 
 
542
  try:
543
  if hasattr(item, 'explanation'):
544
  return item.explanation
 
545
  else:
546
  return item.get('explanation', 'No explanation available')
 
547
  except (AttributeError, KeyError):
548
  return 'No explanation available'
549
 
550
- # Keep original method for backward compatibility
551
- def generate_executive_summary(self,
552
- classification: Dict,
553
- risk_analysis: Dict,
554
- unfavorable_terms: List[Dict],
555
- missing_protections: List[Dict],
556
- clauses: List) -> str:
557
- """
558
- Original method for backward compatibility
559
- """
560
- # Convert dict inputs to appropriate types for the new method
561
- contract_category = type('ContractCategory', (), {
562
- 'category': classification.get('category', 'contract')
563
- })()
564
-
565
- risk_score_obj = type('RiskScore', (), {
566
- 'overall_score': risk_analysis.get('overall_score', 0),
567
- 'risk_level': risk_analysis.get('risk_level', 'unknown'),
568
- 'category_scores': risk_analysis.get('category_scores', {})
569
- })()
570
-
571
- return self.generate_comprehensive_summary(
572
- contract_text="", # Not available in original method
573
- classification=contract_category,
574
- risk_analysis=risk_score_obj,
575
- risk_interpretation=None,
576
- negotiation_playbook=None,
577
- unfavorable_terms=unfavorable_terms,
578
- missing_protections=missing_protections,
579
- clauses=clauses
580
- )
 
1
  # DEPENDENCIES
2
+ import sys
3
+ from typing import Any
4
+ from typing import Dict
5
+ from typing import List
6
+ from pathlib import Path
7
+ from typing import Optional
 
 
 
 
 
 
8
 
9
+ # Add parent directory to path for imports
10
+ sys.path.append(str(Path(__file__).parent.parent))
11
 
12
+ from services.risk_analyzer import RiskScore
13
+ from services.data_models import SummaryContext
14
+ from utils.logger import ContractAnalyzerLogger
15
+ from model_manager.llm_manager import LLMManager
16
+ from model_manager.llm_manager import LLMProvider
17
+ from services.data_models import ContractCategory
18
+ from services.data_models import RiskInterpretation
19
+ from services.data_models import NegotiationPlaybook
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  class SummaryGenerator:
23
  """
24
+ LLM-powered executive summary generator for contract analysis : Generates professional, detailed executive summaries using ALL pipeline outputs
 
25
  """
 
26
  def __init__(self, llm_manager: Optional[LLMManager] = None):
27
  """
28
  Initialize the summary generator
29
 
30
+ Arguments:
31
+ ----------
32
+ llm_manager { LLMManager } : LLM manager instance (if None, creates one with default settings)
33
  """
34
  self.llm_manager = llm_manager or LLMManager()
35
+ self.logger = ContractAnalyzerLogger.get_logger()
36
+
37
+ self.logger.info("Summary generator initialized")
38
+
39
+
40
+ # Main entry point with full pipeline integration
41
+ def generate_executive_summary(self, contract_text: str, classification: ContractCategory, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation,
42
+ negotiation_playbook: NegotiationPlaybook, unfavorable_terms: List, missing_protections: List, clauses: List) -> str:
43
+ """
44
+ Generate executive summary using all the pipeline outputs
45
+
46
+ Arguments:
47
+ ----------
48
+ contract_text { str } : Original contract text (for context)
49
+
50
+ classification { ContractCategory } : Contract classification results
51
+
52
+ risk_analysis { RiskScore } : Complete risk analysis
53
+
54
+ risk_interpretation { RiskInterpretation } : LLM-enhanced risk explanations
55
+
56
+ negotiation_playbook { NegotiationPlaybook } : Comprehensive negotiation strategy
57
+
58
+ unfavorable_terms { List } : Detected unfavorable terms
59
+
60
+ missing_protections { List } : Missing protections
61
+
62
+ clauses { List } : Extracted clauses
63
 
64
  Returns:
65
+ --------
66
+ { str } : Generated executive summary string
67
  """
68
  try:
69
+ # Prepare context with all pipeline data
70
+ context = self._prepare_summary_context(contract_text = contract_text,
71
+ classification = classification,
72
+ risk_analysis = risk_analysis,
73
+ risk_interpretation = risk_interpretation,
74
+ negotiation_playbook = negotiation_playbook,
75
+ unfavorable_terms = unfavorable_terms,
76
+ missing_protections = missing_protections,
77
+ clauses = clauses,
78
+ )
 
79
 
80
+ # Generate summary using LLM
81
+ summary = self._generate_summary(context = context)
82
 
83
+ self.logger.info(f"Executive summary generated - Risk: {context.risk_score}/100 ({context.risk_level})")
84
 
85
  return summary
86
 
87
  except Exception as e:
88
+ self.logger.error(f"Failed to generate comprehensive summary: {repr(e)}")
89
 
90
+ # Fallback with available data
91
+ return self._generate_fallback_summary(contract_text = contract_text,
92
+ classification = classification,
93
+ risk_analysis = risk_analysis,
94
+ unfavorable_terms = unfavorable_terms,
95
+ missing_protections = missing_protections,
96
+ )
 
97
 
98
+
99
+ def _prepare_summary_context(self, contract_text: str, classification: ContractCategory, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation,
100
+ negotiation_playbook: NegotiationPlaybook, unfavorable_terms: List[Dict], missing_protections: List[Dict], clauses: List) -> SummaryContext:
101
+ """
102
+ Prepare summary context with all pipeline data
103
+ """
104
+ # Handle null negotiation_playbook
105
+ walk_away_count = 0
106
+
107
+ if negotiation_playbook and hasattr(negotiation_playbook, 'walk_away_items'):
108
+ walk_away_count = len(negotiation_playbook.walk_away_items)
109
+
110
+ # Extract contract text
111
+ contract_preview = contract_text
112
+
113
+ # Extract key findings from all sources
114
+ key_findings = self._extract_findings(risk_analysis = risk_analysis,
115
+ risk_interpretation = risk_interpretation,
116
+ negotiation_playbook = negotiation_playbook,
117
+ unfavorable_terms = unfavorable_terms,
118
+ missing_protections = missing_protections,
119
+ clauses = clauses,
120
+ )
121
+
122
  # Prepare metadata
123
+ metadata = {"contract_length" : len(contract_text),
124
+ "clauses_analyzed" : len(clauses),
125
+ "critical_issues" : len([t for t in unfavorable_terms if (self._get_severity(t) == "critical")]),
126
+ "walk_away_items" : walk_away_count,
127
+ }
128
+
129
+ return SummaryContext(contract_type = classification.category,
130
+ risk_score = risk_analysis.overall_score,
131
+ risk_level = risk_analysis.risk_level,
132
+ category_scores = risk_analysis.category_scores,
133
+ unfavorable_terms = unfavorable_terms,
134
+ missing_protections = missing_protections,
135
+ clauses = clauses,
136
+ key_findings = key_findings,
137
+ risk_interpretation = risk_interpretation,
138
+ negotiation_playbook = negotiation_playbook,
139
+ contract_text_preview = contract_preview,
140
+ contract_metadata = metadata,
141
+ )
142
+
 
143
 
144
+ def _extract_findings(self, risk_analysis: RiskScore, risk_interpretation: RiskInterpretation, negotiation_playbook: NegotiationPlaybook,
145
+ unfavorable_terms: List[Dict], missing_protections: List[Dict], clauses: List) -> List[str]:
146
+ """
147
+ Extract findings from all analysis components
148
+ """
149
+ findings = list()
150
+
151
+ # Overall risk context
152
+ if (risk_analysis.overall_score >= 80):
 
 
 
 
153
  findings.append("CRITICAL RISK LEVEL: Contract presents unacceptable risk requiring immediate attention")
154
+
155
+ elif (risk_analysis.overall_score >= 60):
156
  findings.append("HIGH RISK LEVEL: Significant concerns requiring substantial negotiation")
157
 
158
+ # Critical unfavorable terms
159
+ critical_terms = [t for t in unfavorable_terms if (self._get_severity(t) == "critical")]
160
+
161
  if critical_terms:
162
  findings.append(f"{len(critical_terms)} CRITICAL unfavorable terms identified")
163
  for term in critical_terms[:2]:
164
+ term_name = self._get_term_name(term = term)
165
+
166
  findings.append(f"Critical: {term_name}")
167
 
168
+ # Critical missing protections
169
+ critical_protections = [p for p in missing_protections if (self._get_importance(p) == "critical")]
170
+
171
  if critical_protections:
172
  findings.append(f"{len(critical_protections)} CRITICAL protections missing")
173
  for prot in critical_protections[:2]:
174
+ prot_name = self._get_protection_name(protection = prot)
175
+
176
  findings.append(f"Missing: {prot_name}")
177
 
178
+ # High-risk categories
179
+ high_risk_categories = [cat for cat, score in risk_analysis.category_scores.items() if (score >= 70)]
 
180
  if high_risk_categories:
181
  findings.append(f"High-risk categories: {', '.join(high_risk_categories)}")
182
 
183
+ # Walk-away items from negotiation playbook
184
  if negotiation_playbook and negotiation_playbook.walk_away_items:
185
  findings.append(f"{len(negotiation_playbook.walk_away_items)} potential deal-breakers identified")
186
 
187
+ # Key concerns from risk interpretation
188
  if risk_interpretation and risk_interpretation.key_concerns:
189
  top_concerns = risk_interpretation.key_concerns[:2]
190
  for concern in top_concerns:
191
  findings.append(f"Key concern: {concern}")
192
 
193
+ return findings
194
 
195
+
196
+ def _generate_summary(self, context: SummaryContext) -> str:
197
+ """
198
+ Generate enhanced summary using comprehensive context
199
+ """
200
+ prompt = self._build_summary_prompt(context)
201
+ system_prompt = self._build_system_prompt()
202
 
203
  try:
204
+ response = self.llm_manager.complete(prompt = prompt,
205
+ system_prompt = system_prompt,
206
+ temperature = 0.3,
207
+ max_tokens = 500,
208
+ json_mode = False,
209
+ )
210
+
 
211
  if response.success and response.text.strip():
212
+ return self._clean_summary_response(text = response.text)
213
+
214
  else:
215
  raise ValueError(f"LLM generation failed: {response.error_message}")
216
 
217
  except Exception as e:
218
+ self.logger.error(f"Enhanced LLM summary generation failed: {e}")
219
  # Fallback to basic summary
220
+ return self._generate_fallback_summary_from_context(context = context)
221
 
 
 
 
 
222
 
223
+ def _build_system_prompt(self) -> str:
224
+ """
225
+ Build system prompt for executive summary generation
226
+ """
227
+ system_prompt = """
228
+ You are a senior contract risk analyst. Generate CONCISE executive summaries.
229
 
230
+ CRITICAL REQUIREMENTS:
231
+ 1. Maximum 120 words (strict limit)
232
+ 2. Must mention SPECIFIC clause numbers (e.g., Clause 8.2, Clause 9.5)
233
+ 3. Direct, urgent tone - no hedging or academic language
234
+ 4. Focus ONLY on top 3 critical risks
 
 
235
 
236
+ STRUCTURE (3-4 sentences total):
237
+ Sentence 1: Overall risk assessment with contract type
238
+ Sentence 2-3: Top 2-3 critical risks with SPECIFIC clause references
239
+ Sentence 4: Brief actionable conclusion
 
 
240
 
241
+ TONE EXAMPLES:
242
+ GOOD: "This employment agreement is heavily skewed in favor of the Employer. Clause 8.2 fails to define post-probation salary. Clause 11.2 allows illegal wage forfeiture."
243
+ BAD: "The comprehensive analysis indicates that there are several concerns that require attention. It is essential to carefully review..."
 
 
 
244
 
245
+ FORBIDDEN PHRASES:
246
+ - "comprehensive analysis"
247
+ - "it is essential to"
248
+ - "requires attention"
249
+ - "should be reviewed"
250
+ - "it is recommended"
251
 
252
+ OUTPUT: Pure paragraph text only. No formatting, no bullets, no headers.
253
+ """
 
 
 
 
 
 
254
 
255
+ return system_prompt
 
 
 
 
 
 
 
 
 
256
 
 
 
257
 
258
+ def _build_summary_prompt(self, context: SummaryContext) -> str:
259
+ """
260
+ Build prompt for executive summary generation
261
+ """
262
+ # Extract top critical issues only
263
+ critical_terms = [t for t in context.unfavorable_terms if self._get_severity(t) == "critical"][:10]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
+ critical_protections = [p for p in context.missing_protections if self._get_importance(p) == "critical"][:10]
266
+
267
+ # Build concise context
268
+ critical_issues_text = ""
269
 
 
 
270
  if critical_terms:
271
+ critical_issues_text += "CRITICAL UNFAVORABLE TERMS:\n"
272
+
273
+ for term in critical_terms:
274
+ clause_reference = self._get_clause_reference(term = term)
275
+ term_name = self._get_term_name(term = term)
276
+ critical_issues_text += f"- {clause_reference}: {term_name}\n"
277
 
 
 
278
  if critical_protections:
279
+ critical_issues_text += "\nCRITICAL MISSING PROTECTIONS:\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
+ for protection in critical_protections:
282
+ protection_name = self._get_protection_name(protection = protection)
283
+ critical_issues_text += f"- {protection_name}\n"
 
 
 
 
 
 
284
 
285
+ # Determine risk tone
286
+ if (context.risk_score >= 80):
287
+ risk_tone = "heavily skewed/very high risk/presents unacceptable risk"
 
288
 
289
+ elif (context.risk_score >= 60):
290
+ risk_tone = "significantly unfavorable/high risk/substantial concerns"
291
+
292
+ elif (context.risk_score >= 40):
293
+ risk_tone = "moderately concerning/notable risk/requires negotiation"
294
+
 
 
 
 
 
 
 
 
295
  else:
296
+ risk_tone = "generally reasonable/manageable risk/standard concerns"
297
+
298
+ summary_prompt = f"""
299
+ CONTRACT ANALYSIS DATA:
300
+
301
+ - Type: {context.contract_type.replace('_', ' ').title()}
302
+ - Risk Score: {context.risk_score}/100
303
+ - Risk Level: {context.risk_level}
304
+ - Appropriate Tone: {risk_tone}
305
+
306
+ {critical_issues_text}
307
+
308
+ TASK:
309
+ Write a 100-120 word executive summary following this EXACT structure:
310
+
311
+ 1. First sentence: "This [contract type] [risk assessment with tone matching score]"
312
+ 2. Second sentence: State top critical risk with SPECIFIC clause number
313
+ 3. Third sentence: State second critical risk with SPECIFIC clause number
314
+ 4. Fourth sentence: Brief conclusion about action needed
315
+
316
+ EXAMPLE (for 85/100 risk employment contract):
317
+ "This employment agreement is heavily skewed in favor of the Employer, presenting a very high risk to the Employee. Key concerns include Clause 9.5's extremely broad 24-month non-compete against the entire industry, and Clause 11.2's punitive penalty allowing forfeiture of earned wages. The termination clauses in Clause 17 are highly asymmetrical, giving the employer unilateral power. Significant negotiation is required before signing."
318
+
319
+ YOUR TURN - Generate summary for THIS contract:
320
+ """
321
+
322
+ return summary_prompt
323
 
324
+
325
  def _clean_summary_response(self, text: str) -> str:
326
+ """
327
+ Clean and format the LLM response
328
+ """
329
  # Remove any markdown formatting
330
+ text = text.replace('**', '').replace('*', '').replace('#', '')
331
 
332
  # Remove common LLM artifacts and empty lines
333
+ lines = text.split('\n')
334
+ cleaned_lines = list()
335
 
336
  for line in lines:
337
  line = line.strip()
 
351
 
352
  return summary
353
 
354
+
355
+ def _generate_fallback_summary(self, contract_text: str, classification: ContractCategory, risk_analysis: RiskScore, unfavorable_terms: List[Dict], missing_protections: List[Dict]) -> str:
356
+ """
357
+ Generate enhanced fallback summary
358
+ """
 
 
 
359
  contract_type_display = classification.category.replace('_', ' ').title()
360
 
361
  # Count critical items
362
+ critical_terms = len([t for t in unfavorable_terms if (self._get_severity(t) == "critical")])
363
+ critical_protections = len([p for p in missing_protections if (self._get_importance(p) == "critical")])
364
 
365
+ # Risk assessment
366
+ if (risk_analysis.overall_score >= 80):
367
  risk_assessment = f"This {contract_type_display} presents a CRITICAL level of risk"
368
+ action = "requires immediate executive attention and significant revision before consideration"
369
+
370
+ elif (risk_analysis.overall_score >= 60):
371
  risk_assessment = f"This {contract_type_display} presents a HIGH level of risk"
372
+ action = "requires careful legal review and substantial negotiation to mitigate key concerns"
373
+
374
+ elif (risk_analysis.overall_score >= 40):
375
  risk_assessment = f"This {contract_type_display} presents a MODERATE level of risk"
376
+ action = "requires professional review and selective negotiation on specific provisions"
377
+
378
  else:
379
  risk_assessment = f"This {contract_type_display} presents a LOW level of risk"
380
  action = "appears generally reasonable but should undergo standard legal review"
381
 
382
+ summary = f"{risk_assessment} with an overall risk score of {risk_analysis.overall_score}/100. "
383
  summary += f"The agreement {action}. "
384
 
385
  # Add critical items context
386
+ if (critical_terms > 0):
387
  summary += f"Analysis identified {critical_terms} critical unfavorable terms "
388
+
389
  if critical_protections > 0:
390
  summary += f"and {critical_protections} critical missing protections. "
391
+
392
  else:
393
  summary += f"and {len(missing_protections)} missing standard protections. "
394
+
395
  else:
396
  summary += f"Review identified {len(unfavorable_terms)} areas for improvement. "
397
 
398
  # Add high-risk categories context
399
+ high_risk_categories = [cat for cat, score in risk_analysis.category_scores.items() if (score >= 60)]
400
+
401
  if high_risk_categories:
402
  category_names = [cat.replace('_', ' ').title() for cat in high_risk_categories[:2]]
403
+ summary += f"Particular attention should be given to {', '.join(category_names)} provisions. "
404
 
405
  summary += "Proceed with the detailed negotiation strategy and risk mitigation recommendations provided in the full analysis."
406
 
407
  return summary
408
+
409
 
410
+ def _generate_fallback_summary_from_context(self, context: SummaryContext) -> str:
411
+ """
412
+ Generate fallback summary from context object
413
+ """
414
+ # Access attributes safely, providing defaults if needed by the fallback logic
415
+ text_preview = context.contract_text_preview if context.contract_text_preview is not None else ""
416
+ missing_prots = context.missing_protections if context.missing_protections is not None else []
417
+ unfav_terms = context.unfavorable_terms if context.unfavorable_terms is not None else []
418
+
419
+ return self._generate_fallback_summary(contract_text = text_preview,
420
+ classification = type('MockClassification', (), {'category': context.contract_type})(),
421
+ risk_analysis = type('MockRiskAnalysis', (), {'overall_score': context.risk_score, 'risk_level': context.risk_level, 'category_scores': context.category_scores or {}})(),
422
+ unfavorable_terms = unfav_terms,
423
+ missing_protections = missing_prots,
424
+ )
425
 
426
+
427
  def _get_severity(self, term) -> str:
428
+ """
429
+ Safely get severity from term object or dict
430
+ """
431
  try:
432
+ if (hasattr(term, 'severity')):
433
  return term.severity
434
+
435
  else:
436
  return term.get('severity', 'unknown')
437
+
438
  except (AttributeError, KeyError):
439
  return 'unknown'
440
 
441
+
442
  def _get_importance(self, protection) -> str:
443
+ """
444
+ Safely get importance from protection object or dict
445
+ """
446
  try:
447
  if hasattr(protection, 'importance'):
448
  return protection.importance
449
+
450
  else:
451
  return protection.get('importance', 'unknown')
452
+
453
  except (AttributeError, KeyError):
454
  return 'unknown'
455
 
456
+
457
  def _get_term_name(self, term) -> str:
458
+ """
459
+ Safely get term name
460
+ """
461
  try:
462
  if hasattr(term, 'term'):
463
  return term.term
464
+
465
  else:
466
  return term.get('term', 'Unknown Term')
467
+
468
  except (AttributeError, KeyError):
469
  return 'Unknown Term'
470
 
471
+
472
  def _get_protection_name(self, protection) -> str:
473
+ """
474
+ Safely get protection name
475
+ """
476
  try:
477
  if hasattr(protection, 'protection'):
478
  return protection.protection
479
+
480
  else:
481
  return protection.get('protection', 'Unknown Protection')
482
+
483
  except (AttributeError, KeyError):
484
  return 'Unknown Protection'
485
 
486
+
487
  def _get_explanation(self, item) -> str:
488
+ """
489
+ Safely get explanation
490
+ """
491
  try:
492
  if hasattr(item, 'explanation'):
493
  return item.explanation
494
+
495
  else:
496
  return item.get('explanation', 'No explanation available')
497
+
498
  except (AttributeError, KeyError):
499
  return 'No explanation available'
500
 
501
+
502
+ def _get_clause_reference(self, term) -> str:
503
+ """
504
+ Safely get clause reference from term
505
+ """
506
+ try:
507
+ if hasattr(term, 'clause_reference'):
508
+ ref = term.clause_reference
509
+ return ref if ref and ref != 'None' else 'Multiple clauses'
510
+
511
+ else:
512
+ ref = term.get('clause_reference', '')
513
+ return ref if ref and ref != 'None' else 'Multiple clauses'
514
+
515
+ except (AttributeError, KeyError):
516
+ return 'Unknown clause'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
services/term_analyzer.py CHANGED
@@ -7,7 +7,6 @@ from typing import Tuple
7
  from pathlib import Path
8
  from typing import Optional
9
  from collections import Counter
10
- from dataclasses import dataclass
11
 
12
  # Add parent directory to path for imports
13
  sys.path.append(str(Path(__file__).parent.parent))
@@ -17,43 +16,8 @@ from utils.logger import log_error
17
  from config.risk_rules import RiskRules
18
  from config.risk_rules import ContractType
19
  from utils.logger import ContractAnalyzerLogger
20
- from services.clause_extractor import ExtractedClause
21
-
22
-
23
- @dataclass
24
- class UnfavorableTerm:
25
- """
26
- Detected unfavorable term with comprehensive risk analysis
27
- """
28
- term : str
29
- category : str
30
- severity : str # "critical", "high", "medium", "low"
31
- explanation : str
32
- risk_score : float # 0-100 risk score
33
- clause_reference : Optional[str] = None
34
- suggested_fix : Optional[str] = None
35
- contract_type : Optional[str] = None
36
- specific_text : Optional[str] = None
37
- benchmark_info : Optional[str] = None # Industry benchmark comparison
38
- legal_basis : Optional[str] = None # Legal principle violated
39
-
40
- def to_dict(self) -> Dict:
41
- """
42
- Convert to dictionary
43
- """
44
- return {"term" : self.term,
45
- "category" : self.category,
46
- "severity" : self.severity,
47
- "explanation" : self.explanation,
48
- "risk_score" : round(self.risk_score, 2),
49
- "clause_reference" : self.clause_reference,
50
- "suggested_fix" : self.suggested_fix,
51
- "contract_type" : self.contract_type,
52
- "specific_text" : self.specific_text,
53
- "benchmark_info" : self.benchmark_info,
54
- "legal_basis" : self.legal_basis,
55
- }
56
-
57
 
58
 
59
  class TermAnalyzer:
@@ -79,6 +43,35 @@ class TermAnalyzer:
79
  contract_type = contract_type.value,
80
  category_weights = self.category_weights,
81
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
 
84
  @ContractAnalyzerLogger.log_execution_time("analyze_unfavorable_terms")
@@ -148,6 +141,9 @@ class TermAnalyzer:
148
  terms = list()
149
  text_lower = clause.text.lower()
150
 
 
 
 
151
  # Risky Patterns Analysis from RiskRules
152
  for pattern, risk_score, description in self.risk_rules.RISKY_PATTERNS:
153
  matches = re.finditer(pattern, text_lower, re.IGNORECASE)
@@ -156,7 +152,7 @@ class TermAnalyzer:
156
  severity = self._score_to_severity(risk_score)
157
 
158
  terms.append(UnfavorableTerm(term = description,
159
- category = clause.category,
160
  severity = severity,
161
  explanation = self._generate_pattern_explanation(description, match.group()),
162
  risk_score = risk_score,
@@ -174,7 +170,7 @@ class TermAnalyzer:
174
  severity = self._score_to_severity(risk_score)
175
 
176
  terms.append(UnfavorableTerm(term = f"Critical Risk: {keyword.title()}",
177
- category = clause.category,
178
  severity = severity,
179
  explanation = self._generate_keyword_explanation(keyword, clause.category),
180
  risk_score = risk_score,
@@ -192,7 +188,7 @@ class TermAnalyzer:
192
  severity = self._score_to_severity(risk_score)
193
 
194
  terms.append(UnfavorableTerm(term = f"High Risk: {keyword.title()}",
195
- category = clause.category,
196
  severity = severity,
197
  explanation = self._generate_keyword_explanation(keyword, clause.category),
198
  risk_score = risk_score,
@@ -232,13 +228,16 @@ class TermAnalyzer:
232
  'force_majeure' : 'force_majeure',
233
  }
234
 
235
- risk_category = category_mapping.get(clause.category)
236
- if not risk_category or risk_category not in self.risk_rules.CLAUSE_RISK_FACTORS:
237
  return terms
238
 
239
- risk_factors = self.risk_rules.CLAUSE_RISK_FACTORS[risk_category]
240
  text_lower = clause.text.lower()
241
 
 
 
 
242
  # Check for red flags in this clause
243
  for red_flag, risk_adjustment in risk_factors["red_flags"].items():
244
  if (red_flag in text_lower):
@@ -247,12 +246,12 @@ class TermAnalyzer:
247
  severity = self._score_to_severity(total_risk)
248
 
249
  terms.append(UnfavorableTerm(term = f"Risk Factor: {red_flag.replace('_', ' ').title()}",
250
- category = clause.category,
251
  severity = severity,
252
- explanation = f"Base risk {base_risk} + {risk_adjustment} for '{red_flag}'. {self._get_risk_factor_explanation(risk_category, red_flag)}",
253
  risk_score = total_risk,
254
  clause_reference = clause.reference,
255
- suggested_fix = self._get_risk_factor_fix(risk_category, red_flag),
256
  contract_type = self.contract_type.value,
257
  specific_text = red_flag,
258
  legal_basis = self._get_legal_basis(red_flag)
@@ -271,20 +270,31 @@ class TermAnalyzer:
271
  # Notice period imbalance (from your original but enhanced)
272
  notice_imbalance = self._check_notice_imbalance(clauses = clauses)
273
  if notice_imbalance:
 
 
274
  terms.append(notice_imbalance)
275
 
276
  # Missing reciprocal provisions
277
  missing_reciprocal = self._check_missing_reciprocal(text = text,
278
  clauses = clauses,
279
  )
 
 
 
280
  terms.extend(missing_reciprocal)
281
 
282
  # Conflicting clauses
283
  conflicts = self._check_conflicting_clauses(clauses = clauses)
 
 
 
284
  terms.extend(conflicts)
285
 
286
  # One-sided discretionary powers
287
  one_sided_powers = self._check_one_sided_discretion(clauses = clauses)
 
 
 
288
  terms.extend(one_sided_powers)
289
 
290
  return terms
@@ -298,8 +308,12 @@ class TermAnalyzer:
298
 
299
  for protection, config in self.risk_rules.PROTECTION_CHECKLIST.items():
300
  if not self._has_protection(clauses, protection, config['categories']):
 
 
 
 
301
  terms.append(UnfavorableTerm(term = f"Missing Protection: {protection.replace('_', ' ').title()}",
302
- category = config['categories'][0] if config['categories'] else "general",
303
  severity = self._score_to_severity(config['risk_if_missing']),
304
  explanation = f"Missing critical protection: {protection}. {self._get_missing_protection_explanation(protection)}",
305
  risk_score = config['risk_if_missing'],
@@ -320,6 +334,10 @@ class TermAnalyzer:
320
 
321
  for clause in clauses:
322
  benchmark_issues = self._check_benchmark_compliance(clause = clause)
 
 
 
 
323
  terms.extend(benchmark_issues)
324
 
325
  return terms
@@ -355,11 +373,14 @@ class TermAnalyzer:
355
  ratio = max_period / min_period
356
 
357
  if (ratio >= 2):
358
- severity = "critical" if (ratio >= 3) else "high"
359
- risk_score = 80 if (ratio >= 3) else 60
 
 
 
360
 
361
  return UnfavorableTerm(term = "Imbalanced Notice Periods",
362
- category = "termination",
363
  severity = severity,
364
  explanation = f"Significant notice period imbalance: {max_period} days vs {min_period} days (ratio: {ratio:.1f}x). Creates unfair burden.",
365
  risk_score = risk_score,
@@ -386,8 +407,11 @@ class TermAnalyzer:
386
  has_mutual = any("mutual" in c.text.lower() or "both parties" in c.text.lower() or "each party" in c.text.lower() for c in indem_clauses)
387
 
388
  if has_one_sided and not has_mutual:
 
 
 
389
  terms.append(UnfavorableTerm(term = "Non-Reciprocal Indemnification",
390
- category = "indemnification",
391
  severity = "critical",
392
  explanation = "One-sided indemnification creates unlimited liability exposure without reciprocal protection.",
393
  risk_score = 85,
@@ -411,19 +435,21 @@ class TermAnalyzer:
411
  by_category = dict()
412
 
413
  for clause in clauses:
414
- if clause.category not in by_category:
415
- by_category[clause.category] = []
 
 
416
 
417
- by_category[clause.category].append(clause)
418
 
419
  # Check for conflicts within each category
420
- for category, category_clauses in by_category.items():
421
  if (len(category_clauses) >= 2):
422
  for i, clause1 in enumerate(category_clauses):
423
  for clause2 in category_clauses[i+1:]:
424
  if (self._are_clauses_conflicting(clause1, clause2)):
425
- terms.append(UnfavorableTerm(term = f"Conflicting {category.title()} Clauses",
426
- category = category,
427
  severity = "high",
428
  explanation = f"Clauses {clause1.reference} and {clause2.reference} contain conflicting terms creating legal ambiguity.",
429
  risk_score = 70,
@@ -448,8 +474,11 @@ class TermAnalyzer:
448
  # Look for one-sided discretionary language
449
  if re.search(r'(sole|absolute|unfettered|unilateral)\s+(discretion|right|authority)', text_lower):
450
  if not re.search(r'(mutual|both parties|reasonable)\s+(discretion|agreement)', text_lower):
 
 
 
451
  terms.append(UnfavorableTerm(term = "One-Sided Discretionary Power",
452
- category = clause.category,
453
  severity = "high",
454
  explanation = "Gives one party unilateral decision-making power without accountability standards.",
455
  risk_score = 75,
@@ -478,7 +507,7 @@ class TermAnalyzer:
478
  unit = duration_match.group(2)
479
 
480
  # Convert to months for comparison
481
- total_months = duration * (12 if unit == "year" else 1)
482
 
483
  benchmarks = self.risk_rules.INDUSTRY_BENCHMARKS.get('non_compete_duration', {})
484
  industry_benchmark = benchmarks.get(self.contract_type.value, benchmarks.get('general', {}))
@@ -488,8 +517,11 @@ class TermAnalyzer:
488
  excessive = industry_benchmark.get('excessive', 24)
489
 
490
  if (total_months > excessive):
 
 
 
491
  terms.append(UnfavorableTerm(term = "Excessive Non-Compete Duration",
492
- category = clause.category,
493
  severity = "critical",
494
  explanation = f"{duration} {unit} non-compete exceeds industry excessive threshold of {excessive} months.",
495
  risk_score = 90,
@@ -736,4 +768,4 @@ class TermAnalyzer:
736
 
737
  log_info("Unfavorable terms category distribution", **distribution)
738
 
739
- return distribution
 
7
  from pathlib import Path
8
  from typing import Optional
9
  from collections import Counter
 
10
 
11
  # Add parent directory to path for imports
12
  sys.path.append(str(Path(__file__).parent.parent))
 
16
  from config.risk_rules import RiskRules
17
  from config.risk_rules import ContractType
18
  from utils.logger import ContractAnalyzerLogger
19
+ from services.data_models import ExtractedClause
20
+ from services.data_models import UnfavorableTerm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
  class TermAnalyzer:
 
43
  contract_type = contract_type.value,
44
  category_weights = self.category_weights,
45
  )
46
+
47
+
48
+ def _map_to_risk_category(self, clause_category: str) -> str:
49
+ """
50
+ Map clause category to risk category for proper risk scoring for ensureing unfavorable terms are correctly attributed to risk categories
51
+ for score calculation
52
+ """
53
+ # Clause categories → Risk categories
54
+ mapping = {"non_compete" : "restrictive_covenants",
55
+ "confidentiality" : "restrictive_covenants",
56
+ "termination" : "termination_rights",
57
+ "indemnification" : "liability_indemnity",
58
+ "liability" : "penalties_liability",
59
+ "compensation" : "compensation_benefits",
60
+ "intellectual_property" : "intellectual_property",
61
+ "warranty" : "warranties",
62
+ "dispute_resolution" : "dispute_resolution",
63
+ "assignment" : "assignment_change",
64
+ "amendment" : "assignment_change",
65
+ "insurance" : "insurance",
66
+ "force_majeure" : "force_majeure",
67
+ "general" : "general",
68
+ "payment" : "payment_terms",
69
+ "governing_law" : "governing_law",
70
+ }
71
+
72
+ risk_category_by_clause_category = mapping.get(clause_category, clause_category)
73
+
74
+ return risk_category_by_clause_category
75
 
76
 
77
  @ContractAnalyzerLogger.log_execution_time("analyze_unfavorable_terms")
 
141
  terms = list()
142
  text_lower = clause.text.lower()
143
 
144
+ # Map clause category to risk category for consistency
145
+ risk_category = self._map_to_risk_category(clause_category = clause.category)
146
+
147
  # Risky Patterns Analysis from RiskRules
148
  for pattern, risk_score, description in self.risk_rules.RISKY_PATTERNS:
149
  matches = re.finditer(pattern, text_lower, re.IGNORECASE)
 
152
  severity = self._score_to_severity(risk_score)
153
 
154
  terms.append(UnfavorableTerm(term = description,
155
+ category = risk_category,
156
  severity = severity,
157
  explanation = self._generate_pattern_explanation(description, match.group()),
158
  risk_score = risk_score,
 
170
  severity = self._score_to_severity(risk_score)
171
 
172
  terms.append(UnfavorableTerm(term = f"Critical Risk: {keyword.title()}",
173
+ category = risk_category,
174
  severity = severity,
175
  explanation = self._generate_keyword_explanation(keyword, clause.category),
176
  risk_score = risk_score,
 
188
  severity = self._score_to_severity(risk_score)
189
 
190
  terms.append(UnfavorableTerm(term = f"High Risk: {keyword.title()}",
191
+ category = risk_category,
192
  severity = severity,
193
  explanation = self._generate_keyword_explanation(keyword, clause.category),
194
  risk_score = risk_score,
 
228
  'force_majeure' : 'force_majeure',
229
  }
230
 
231
+ risk_factors_key = category_mapping.get(clause.category)
232
+ if not risk_factors_key or risk_factors_key not in self.risk_rules.CLAUSE_RISK_FACTORS:
233
  return terms
234
 
235
+ risk_factors = self.risk_rules.CLAUSE_RISK_FACTORS[risk_factors_key]
236
  text_lower = clause.text.lower()
237
 
238
+ # Map clause category to risk category for consistency
239
+ risk_category = self._map_to_risk_category(clause_category = clause.category)
240
+
241
  # Check for red flags in this clause
242
  for red_flag, risk_adjustment in risk_factors["red_flags"].items():
243
  if (red_flag in text_lower):
 
246
  severity = self._score_to_severity(total_risk)
247
 
248
  terms.append(UnfavorableTerm(term = f"Risk Factor: {red_flag.replace('_', ' ').title()}",
249
+ category = risk_category,
250
  severity = severity,
251
+ explanation = f"Base risk {base_risk} + {risk_adjustment} for '{red_flag}'. {self._get_risk_factor_explanation(risk_factors_key, red_flag)}",
252
  risk_score = total_risk,
253
  clause_reference = clause.reference,
254
+ suggested_fix = self._get_risk_factor_fix(risk_factors_key, red_flag),
255
  contract_type = self.contract_type.value,
256
  specific_text = red_flag,
257
  legal_basis = self._get_legal_basis(red_flag)
 
270
  # Notice period imbalance (from your original but enhanced)
271
  notice_imbalance = self._check_notice_imbalance(clauses = clauses)
272
  if notice_imbalance:
273
+ # Ensure the category used is a risk category
274
+ notice_imbalance.category = self._map_to_risk_category(clause_category = "termination")
275
  terms.append(notice_imbalance)
276
 
277
  # Missing reciprocal provisions
278
  missing_reciprocal = self._check_missing_reciprocal(text = text,
279
  clauses = clauses,
280
  )
281
+ for item in missing_reciprocal:
282
+ # Ensure the category used is a risk category
283
+ item.category = self._map_to_risk_category(clause_category = "indemnification")
284
  terms.extend(missing_reciprocal)
285
 
286
  # Conflicting clauses
287
  conflicts = self._check_conflicting_clauses(clauses = clauses)
288
+ for item in conflicts:
289
+ # Ensure the category used is a risk category
290
+ item.category = self._map_to_risk_category(clause_category = item.category)
291
  terms.extend(conflicts)
292
 
293
  # One-sided discretionary powers
294
  one_sided_powers = self._check_one_sided_discretion(clauses = clauses)
295
+ for item in one_sided_powers:
296
+ # Ensure the category used is a risk category
297
+ item.category = self._map_to_risk_category(clause_category = item.category)
298
  terms.extend(one_sided_powers)
299
 
300
  return terms
 
308
 
309
  for protection, config in self.risk_rules.PROTECTION_CHECKLIST.items():
310
  if not self._has_protection(clauses, protection, config['categories']):
311
+ # For missing protections, map the first associated category to a risk category
312
+ # This assumes config['categories'][0] is a clause category like "termination"
313
+ risk_category = self._map_to_risk_category(clause_category = config['categories'][0]) if config['categories'] else "general"
314
+
315
  terms.append(UnfavorableTerm(term = f"Missing Protection: {protection.replace('_', ' ').title()}",
316
+ category = risk_category,
317
  severity = self._score_to_severity(config['risk_if_missing']),
318
  explanation = f"Missing critical protection: {protection}. {self._get_missing_protection_explanation(protection)}",
319
  risk_score = config['risk_if_missing'],
 
334
 
335
  for clause in clauses:
336
  benchmark_issues = self._check_benchmark_compliance(clause = clause)
337
+ for item in benchmark_issues:
338
+ # Ensure the category used is a risk category
339
+ item.category = self._map_to_risk_category(clause_category = clause.category)
340
+
341
  terms.extend(benchmark_issues)
342
 
343
  return terms
 
373
  ratio = max_period / min_period
374
 
375
  if (ratio >= 2):
376
+ severity = "critical" if (ratio >= 3) else "high"
377
+ risk_score = 80 if (ratio >= 3) else 60
378
+
379
+ # Use the risk category mapping for termination
380
+ risk_category = self._map_to_risk_category(clause_category = "termination")
381
 
382
  return UnfavorableTerm(term = "Imbalanced Notice Periods",
383
+ category = risk_category,
384
  severity = severity,
385
  explanation = f"Significant notice period imbalance: {max_period} days vs {min_period} days (ratio: {ratio:.1f}x). Creates unfair burden.",
386
  risk_score = risk_score,
 
407
  has_mutual = any("mutual" in c.text.lower() or "both parties" in c.text.lower() or "each party" in c.text.lower() for c in indem_clauses)
408
 
409
  if has_one_sided and not has_mutual:
410
+ # Use the risk category mapping for indemnification
411
+ risk_category = self._map_to_risk_category(clause_category = "indemnification")
412
+
413
  terms.append(UnfavorableTerm(term = "Non-Reciprocal Indemnification",
414
+ category = risk_category,
415
  severity = "critical",
416
  explanation = "One-sided indemnification creates unlimited liability exposure without reciprocal protection.",
417
  risk_score = 85,
 
435
  by_category = dict()
436
 
437
  for clause in clauses:
438
+ # Map the clause category to the risk category for grouping purposes
439
+ risk_cat = self._map_to_risk_category(clause_category = clause.category)
440
+ if risk_cat not in by_category:
441
+ by_category[risk_cat] = []
442
 
443
+ by_category[risk_cat].append(clause)
444
 
445
  # Check for conflicts within each category
446
+ for risk_category, category_clauses in by_category.items():
447
  if (len(category_clauses) >= 2):
448
  for i, clause1 in enumerate(category_clauses):
449
  for clause2 in category_clauses[i+1:]:
450
  if (self._are_clauses_conflicting(clause1, clause2)):
451
+ terms.append(UnfavorableTerm(term = f"Conflicting {risk_category.title()} Clauses",
452
+ category = risk_category,
453
  severity = "high",
454
  explanation = f"Clauses {clause1.reference} and {clause2.reference} contain conflicting terms creating legal ambiguity.",
455
  risk_score = 70,
 
474
  # Look for one-sided discretionary language
475
  if re.search(r'(sole|absolute|unfettered|unilateral)\s+(discretion|right|authority)', text_lower):
476
  if not re.search(r'(mutual|both parties|reasonable)\s+(discretion|agreement)', text_lower):
477
+ # Use the risk category mapping for the clause's category
478
+ risk_category = self._map_to_risk_category(clause_category = clause.category)
479
+
480
  terms.append(UnfavorableTerm(term = "One-Sided Discretionary Power",
481
+ category = risk_category,
482
  severity = "high",
483
  explanation = "Gives one party unilateral decision-making power without accountability standards.",
484
  risk_score = 75,
 
507
  unit = duration_match.group(2)
508
 
509
  # Convert to months for comparison
510
+ total_months = duration * (12 if (unit == "year") else 1)
511
 
512
  benchmarks = self.risk_rules.INDUSTRY_BENCHMARKS.get('non_compete_duration', {})
513
  industry_benchmark = benchmarks.get(self.contract_type.value, benchmarks.get('general', {}))
 
517
  excessive = industry_benchmark.get('excessive', 24)
518
 
519
  if (total_months > excessive):
520
+ # Use the risk category mapping for non_compete
521
+ risk_category = self._map_to_risk_category(clause_category = clause.category)
522
+
523
  terms.append(UnfavorableTerm(term = "Excessive Non-Compete Duration",
524
+ category = risk_category,
525
  severity = "critical",
526
  explanation = f"{duration} {unit} non-compete exceeds industry excessive threshold of {excessive} months.",
527
  risk_score = 90,
 
768
 
769
  log_info("Unfavorable terms category distribution", **distribution)
770
 
771
+ return distribution
static/index.html CHANGED
The diff for this file is too large to render. See raw diff
 
utils/document_reader.py CHANGED
@@ -73,11 +73,6 @@ class DocumentReader:
73
  # Normalize file_type by removing any dots and converting to lowercase
74
  normalized_file_type = file_type.lower().replace('.', '')
75
 
76
- # Validate file type USING NORMALIZED TYPE
77
- # if (normalized_file_type not in DocumentReader.ALLOWED_TYPES):
78
- # allowed_str = ', '.join(f'.{ft}' for ft in DocumentReader.ALLOWED_TYPES)
79
- # raise ValueError(f"Unsupported file type: {file_type}. Allowed types: {allowed_str}")
80
-
81
  # Validate file size
82
  DocumentReader._validate_file_size(file_path_or_bytes = file_path_or_bytes)
83
 
 
73
  # Normalize file_type by removing any dots and converting to lowercase
74
  normalized_file_type = file_type.lower().replace('.', '')
75
 
 
 
 
 
 
76
  # Validate file size
77
  DocumentReader._validate_file_size(file_path_or_bytes = file_path_or_bytes)
78