satyakimitra commited on
Commit
9a9ad3b
·
1 Parent(s): bdedf43

Everything updated

Browse files
Files changed (1) hide show
  1. services/market_comparator.py +0 -468
services/market_comparator.py DELETED
@@ -1,468 +0,0 @@
1
- """
2
- Market Comparator
3
- Compares contract terms to market standards using semantic similarity
4
- """
5
-
6
- import torch
7
- from typing import List, Dict, Tuple, Optional, Any
8
- from dataclasses import dataclass
9
- import numpy as np
10
- import sys
11
- from pathlib import Path
12
-
13
- # Add parent directory to path for imports
14
- sys.path.append(str(Path(__file__).parent.parent))
15
-
16
- from services.clause_extractor import ExtractedClause
17
- from utils.logger import ContractAnalyzerLogger, log_info
18
-
19
-
20
- @dataclass
21
- class MarketComparison:
22
- """Market comparison result for a clause"""
23
- clause_category: str
24
- user_clause: str
25
- market_standard: str
26
- similarity_score: float
27
- assessment: str # "standard", "favorable", "unfavorable", "aggressive"
28
- explanation: str
29
- recommendation: Optional[str] = None
30
-
31
- def to_dict(self) -> Dict[str, Any]:
32
- """Convert to dictionary"""
33
- return {
34
- "clause_category": self.clause_category,
35
- "user_clause": self.user_clause,
36
- "market_standard": self.market_standard,
37
- "similarity_score": round(self.similarity_score, 3),
38
- "assessment": self.assessment,
39
- "explanation": self.explanation,
40
- "recommendation": self.recommendation
41
- }
42
-
43
-
44
- class MarketComparator:
45
- """
46
- Compare contract terms to market standards
47
- Uses semantic similarity with embedding model
48
- """
49
-
50
- def __init__(self, model_loader):
51
- """
52
- Initialize market comparator
53
-
54
- Args:
55
- model_loader: ModelLoader instance for embedding model
56
- """
57
- self.model_loader = model_loader
58
- self.embedding_model = None
59
- self.logger = ContractAnalyzerLogger.get_logger()
60
-
61
- self._lazy_load()
62
- self._load_market_standards()
63
-
64
- log_info("MarketComparator initialized")
65
-
66
- def _lazy_load(self):
67
- """Lazy load embedding model"""
68
- if self.embedding_model is None:
69
- log_info("Loading embedding model for market comparison...")
70
- self.embedding_model = self.model_loader.load_embedding_model()
71
- log_info("Embedding model loaded")
72
-
73
- def _load_market_standards(self):
74
- """Load market standard clause templates"""
75
- self.market_standards = {
76
- 'non_compete': {
77
- 'reasonable': (
78
- "Employee agrees not to compete with Company in direct competitive business "
79
- "within 50 miles for 6 months after termination."
80
- ),
81
- 'standard': (
82
- "Employee shall not engage in competitive activities with direct competitors "
83
- "for 12 months within the geographic area of Company operations."
84
- ),
85
- 'aggressive': (
86
- "Employee shall not work in any capacity in the industry for 24 months globally.")
87
- },
88
- 'termination': {
89
- 'reasonable': (
90
- "Either party may terminate with 30 days written notice. Company shall pay "
91
- "severance equal to 2 months salary if terminated without cause."
92
- ),
93
- 'standard': (
94
- "Either party may terminate with 30 days notice. Employee terminated without "
95
- "cause receives 1 month severance."
96
- ),
97
- 'aggressive': (
98
- "Company may terminate immediately without cause or notice. Employee must "
99
- "provide 60 days notice."
100
- )
101
- },
102
- 'confidentiality': {
103
- 'reasonable': (
104
- "Confidential information remains confidential for 3 years after termination, "
105
- "limited to information marked confidential."
106
- ),
107
- 'standard': (
108
- "Employee shall maintain confidentiality of proprietary information for 5 years "
109
- "after termination."
110
- ),
111
- 'aggressive': (
112
- "All information learned during employment remains confidential perpetually."
113
- )
114
- },
115
- 'intellectual_property': {
116
- 'reasonable': (
117
- "Company owns work product created for Company during employment, excluding "
118
- "personal projects unrelated to Company business."
119
- ),
120
- 'standard': (
121
- "All work product and inventions created during employment belong to Company."
122
- ),
123
- 'aggressive': (
124
- "Company owns all intellectual property created by Employee during employment "
125
- "and for 12 months after, including personal projects."
126
- )
127
- },
128
- 'indemnification': {
129
- 'reasonable': (
130
- "Each party shall indemnify the other for losses arising from their respective "
131
- "negligence or willful misconduct, capped at fees paid."
132
- ),
133
- 'standard': (
134
- "Employee shall indemnify Company for losses arising from Employee's breach "
135
- "or negligence."
136
- ),
137
- 'aggressive': (
138
- "Employee shall indemnify Company for all claims, with unlimited liability "
139
- "including consequential damages."
140
- )
141
- },
142
- 'liability': {
143
- 'reasonable': (
144
- "Liability capped at 12 months of fees paid. No liability for indirect or "
145
- "consequential damages."
146
- ),
147
- 'standard': (
148
- "Liability limited to direct damages only, capped at amount paid in preceding "
149
- "12 months."
150
- ),
151
- 'aggressive': (
152
- "No limitation on liability. Party liable for all damages including consequential, "
153
- "indirect, and punitive."
154
- )
155
- },
156
- 'compensation': {
157
- 'reasonable': (
158
- "Base salary of $X per year, paid bi-weekly. Bonus of up to Y% based on clear "
159
- "performance metrics. Annual review guaranteed."
160
- ),
161
- 'standard': (
162
- "Annual salary of $X payable per company payroll schedule. Discretionary bonus "
163
- "may be awarded."
164
- ),
165
- 'aggressive': (
166
- "Compensation to be determined. Subject to review and modification at company's "
167
- "sole discretion."
168
- )
169
- },
170
- 'warranty': {
171
- 'reasonable': (
172
- "Services performed in professional manner consistent with industry standards. "
173
- "Limited warranty for 90 days."
174
- ),
175
- 'standard': (
176
- "Work performed in good faith. Warranty disclaimers for merchantability and "
177
- "fitness for purpose."
178
- ),
179
- 'aggressive': (
180
- "All warranties disclaimed. No guarantee of results. AS-IS with no recourse."
181
- )
182
- }
183
- }
184
-
185
- log_info(f"Loaded market standards for {len(self.market_standards)} categories")
186
-
187
- @ContractAnalyzerLogger.log_execution_time("compare_to_market")
188
- def compare_to_market(self, clauses: List[ExtractedClause]) -> List[MarketComparison]:
189
- """
190
- Compare extracted clauses to market standards
191
-
192
- Args:
193
- clauses: List of extracted clauses
194
-
195
- Returns:
196
- List of MarketComparison objects
197
- """
198
-
199
- log_info(f"Starting market comparison for {len(clauses)} clauses")
200
-
201
- comparisons = []
202
-
203
- for clause in clauses:
204
- if clause.category in self.market_standards:
205
- comparison = self._compare_single_clause(clause)
206
- if comparison:
207
- comparisons.append(comparison)
208
-
209
- log_info(f"Market comparison complete",
210
- total_comparisons=len(comparisons),
211
- unfavorable=sum(1 for c in comparisons if c.assessment == "unfavorable"))
212
-
213
- return comparisons
214
-
215
- def _compare_single_clause(self, clause: ExtractedClause) -> Optional[MarketComparison]:
216
- """Compare single clause to market standards"""
217
-
218
- # Get market standards for this category
219
- standards = self.market_standards.get(clause.category)
220
- if not standards:
221
- return None
222
-
223
- # Encode user clause
224
- user_embedding = self.embedding_model.encode(clause.text, convert_to_tensor=True)
225
-
226
- # Encode market standards
227
- standard_texts = list(standards.values())
228
- standard_embeddings = self.embedding_model.encode(standard_texts, convert_to_tensor=True)
229
-
230
- # Calculate similarities using cosine similarity
231
- similarities = []
232
- for std_emb in standard_embeddings:
233
- similarity = torch.nn.functional.cosine_similarity(
234
- user_embedding.unsqueeze(0),
235
- std_emb.unsqueeze(0)
236
- ).item()
237
- similarities.append(similarity)
238
-
239
- # Find best match
240
- best_idx = similarities.index(max(similarities))
241
- best_similarity = similarities[best_idx]
242
- best_standard_type = list(standards.keys())[best_idx]
243
- best_standard_text = standard_texts[best_idx]
244
-
245
- # Assess based on similarity and standard type
246
- assessment, explanation, recommendation = self._assess_comparison(
247
- best_standard_type,
248
- best_similarity,
249
- clause.category
250
- )
251
-
252
- return MarketComparison(
253
- clause_category=clause.category,
254
- user_clause=clause.text[:150] + "..." if len(clause.text) > 150 else clause.text,
255
- market_standard=best_standard_text,
256
- similarity_score=best_similarity,
257
- assessment=assessment,
258
- explanation=explanation,
259
- recommendation=recommendation
260
- )
261
-
262
- def _assess_comparison(self, standard_type: str,
263
- similarity: float,
264
- category: str) -> Tuple[str, str, str]:
265
- """
266
- Assess if clause is favorable, standard, or unfavorable
267
-
268
- Returns:
269
- (assessment, explanation, recommendation) tuple
270
- """
271
-
272
- # High similarity to reasonable standard = favorable
273
- if standard_type == 'reasonable' and similarity >= 0.65:
274
- return (
275
- "favorable",
276
- f"This {category} clause aligns with reasonable market standards (similarity: {similarity:.2%})",
277
- "This is a fair term. Consider accepting as-is or requesting minor improvements."
278
- )
279
-
280
- # High similarity to standard = acceptable
281
- elif standard_type == 'standard' and similarity >= 0.65:
282
- return (
283
- "standard",
284
- f"This {category} clause matches typical market standards (similarity: {similarity:.2%})",
285
- "This is standard market practice. Acceptable but could negotiate for better terms."
286
- )
287
-
288
- # High similarity to aggressive = unfavorable
289
- elif standard_type == 'aggressive' and similarity >= 0.65:
290
- return (
291
- "unfavorable",
292
- f"This {category} clause is more aggressive than market standards (similarity: {similarity:.2%})",
293
- "This is unfavorable. Strongly recommend negotiating to align with market norms."
294
- )
295
-
296
- # Moderate similarity
297
- elif 0.50 <= similarity < 0.65:
298
- if standard_type == 'reasonable':
299
- return (
300
- "standard",
301
- f"This {category} clause is somewhat aligned with reasonable standards",
302
- "Consider requesting adjustments to better align with favorable market terms."
303
- )
304
- elif standard_type == 'aggressive':
305
- return (
306
- "concerning",
307
- f"This {category} clause shows some aggressive elements compared to market norms",
308
- "Recommend negotiation to remove unfavorable provisions."
309
- )
310
- else:
311
- return (
312
- "standard",
313
- f"This {category} clause is within normal market range",
314
- "Review carefully but likely acceptable if other terms are favorable."
315
- )
316
-
317
- # Low similarity - unclear
318
- else:
319
- return (
320
- "unclear",
321
- f"This {category} clause is unique and difficult to compare to standard market terms",
322
- "Seek legal counsel for specialized assessment of these non-standard terms."
323
- )
324
-
325
- def get_recommendations(self, comparisons: List[MarketComparison]) -> List[str]:
326
- """
327
- Get actionable recommendations based on market comparison
328
-
329
- Args:
330
- comparisons: List of market comparisons
331
-
332
- Returns:
333
- List of recommendation strings
334
- """
335
-
336
- recommendations = []
337
-
338
- # Group by assessment
339
- unfavorable = [c for c in comparisons if c.assessment == "unfavorable"]
340
- concerning = [c for c in comparisons if c.assessment == "concerning"]
341
-
342
- # Priority recommendations for unfavorable terms
343
- for comp in unfavorable[:5]: # Top 5
344
- recommendations.append(
345
- f"⚠️ {comp.clause_category.replace('_', ' ').title()}: "
346
- f"Negotiate to align with market standard. {comp.recommendation}"
347
- )
348
-
349
- # Secondary recommendations for concerning terms
350
- for comp in concerning[:3]: # Top 3
351
- recommendations.append(
352
- f"📋 {comp.clause_category.replace('_', ' ').title()}: "
353
- f"Consider requesting modifications. {comp.recommendation}"
354
- )
355
-
356
- log_info(f"Generated {len(recommendations)} recommendations")
357
-
358
- return recommendations
359
-
360
- def get_unfavorable_comparisons(self, comparisons: List[MarketComparison]) -> List[MarketComparison]:
361
- """Filter to only unfavorable comparisons"""
362
- unfavorable = [c for c in comparisons if c.assessment in ["unfavorable", "aggressive"]]
363
-
364
- log_info(f"Found {len(unfavorable)} unfavorable market comparisons")
365
-
366
- return unfavorable
367
-
368
- def get_favorable_comparisons(self, comparisons: List[MarketComparison]) -> List[MarketComparison]:
369
- """Filter to only favorable comparisons"""
370
- favorable = [c for c in comparisons if c.assessment == "favorable"]
371
-
372
- log_info(f"Found {len(favorable)} favorable market comparisons")
373
-
374
- return favorable
375
-
376
- def get_comparison_summary(self, comparisons: List[MarketComparison]) -> Dict[str, Any]:
377
- """
378
- Get summary statistics of market comparisons
379
-
380
- Returns:
381
- Dictionary with summary statistics
382
- """
383
-
384
- if not comparisons:
385
- return {
386
- "total": 0,
387
- "favorable": 0,
388
- "standard": 0,
389
- "unfavorable": 0,
390
- "concerning": 0,
391
- "unclear": 0,
392
- "avg_similarity": 0.0
393
- }
394
-
395
- assessments = [c.assessment for c in comparisons]
396
- similarities = [c.similarity_score for c in comparisons]
397
-
398
- summary = {
399
- "total": len(comparisons),
400
- "favorable": assessments.count("favorable"),
401
- "standard": assessments.count("standard"),
402
- "unfavorable": assessments.count("unfavorable"),
403
- "concerning": assessments.count("concerning"),
404
- "unclear": assessments.count("unclear"),
405
- "avg_similarity": round(sum(similarities) / len(similarities), 3)
406
- }
407
-
408
- log_info("Comparison summary", **summary)
409
-
410
- return summary
411
-
412
- def compare_specific_text(self, text: str,
413
- category: str) -> Optional[MarketComparison]:
414
- """
415
- Compare specific text to market standards
416
-
417
- Args:
418
- text: Clause text to compare
419
- category: Category of the clause
420
-
421
- Returns:
422
- MarketComparison object or None
423
- """
424
-
425
- # Create temporary ExtractedClause
426
- temp_clause = ExtractedClause(
427
- text=text,
428
- reference="Manual",
429
- category=category,
430
- confidence=1.0,
431
- start_pos=0,
432
- end_pos=len(text),
433
- extraction_method="manual",
434
- risk_indicators=[],
435
- legal_bert_score=0.0
436
- )
437
-
438
- return self._compare_single_clause(temp_clause)
439
-
440
- def get_best_practice_example(self, category: str) -> Optional[str]:
441
- """
442
- Get best practice example for a category
443
-
444
- Args:
445
- category: Clause category
446
-
447
- Returns:
448
- Best practice example text or None
449
- """
450
-
451
- standards = self.market_standards.get(category)
452
- if standards and 'reasonable' in standards:
453
- return standards['reasonable']
454
-
455
- return None
456
-
457
- def get_market_range(self, category: str) -> Optional[Dict[str, str]]:
458
- """
459
- Get the full market range for a category
460
-
461
- Args:
462
- category: Clause category
463
-
464
- Returns:
465
- Dictionary with reasonable/standard/aggressive examples
466
- """
467
-
468
- return self.market_standards.get(category)