Update app.py
Browse files
app.py
CHANGED
|
@@ -110,8 +110,10 @@ def remote_generate(prompt: str, max_new_tokens: int = 512, temperature: float =
|
|
| 110 |
if not HF_INFERENCE_API_KEY:
|
| 111 |
raise Exception("HF_INFERENCE_API_KEY not set for remote generation")
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
| 115 |
payload = {
|
| 116 |
"inputs": prompt,
|
| 117 |
"parameters": {
|
|
@@ -122,8 +124,19 @@ def remote_generate(prompt: str, max_new_tokens: int = 512, temperature: float =
|
|
| 122 |
}
|
| 123 |
}
|
| 124 |
|
| 125 |
-
logger.info(f" β Remote inference request to {REMOTE_LLM_MODEL} (tokens={max_new_tokens}, temp={temperature})")
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
if r.status_code != 200:
|
| 128 |
logger.error(f" β Remote inference error {r.status_code}: {r.text[:200]}")
|
| 129 |
return ""
|
|
@@ -368,6 +381,92 @@ def generate_extractive_answer(query: str, retrieved_docs: List[Document]) -> Op
|
|
| 368 |
return answer
|
| 369 |
|
| 370 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
def retrieve_knowledge_langchain(
|
| 372 |
query: str,
|
| 373 |
vectorstore,
|
|
@@ -639,9 +738,20 @@ def generate_answer_langchain(
|
|
| 639 |
|
| 640 |
if not llm_answer:
|
| 641 |
logger.error(f" β All 2 LLM attempts failed")
|
| 642 |
-
#
|
| 643 |
-
#
|
| 644 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
try:
|
| 646 |
logger.info(" β Using extractive fallback generator")
|
| 647 |
fallback = generate_extractive_answer(query, retrieved_docs)
|
|
|
|
| 110 |
if not HF_INFERENCE_API_KEY:
|
| 111 |
raise Exception("HF_INFERENCE_API_KEY not set for remote generation")
|
| 112 |
|
| 113 |
+
# New router endpoint is required by HF (replaces api-inference.huggingface.co)
|
| 114 |
+
router_url = f"https://router.huggingface.co/models/{REMOTE_LLM_MODEL}"
|
| 115 |
+
old_url = f"https://api-inference.huggingface.co/models/{REMOTE_LLM_MODEL}"
|
| 116 |
+
headers = {"Authorization": f"Bearer {HF_INFERENCE_API_KEY}", "Accept": "application/json"}
|
| 117 |
payload = {
|
| 118 |
"inputs": prompt,
|
| 119 |
"parameters": {
|
|
|
|
| 124 |
}
|
| 125 |
}
|
| 126 |
|
| 127 |
+
logger.info(f" β Remote inference request to router {REMOTE_LLM_MODEL} (tokens={max_new_tokens}, temp={temperature})")
|
| 128 |
+
try:
|
| 129 |
+
r = requests.post(router_url, headers=headers, json=payload, timeout=120)
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.error(f" β Remote router request failed: {e}")
|
| 132 |
+
# Try older endpoint as a fallback
|
| 133 |
+
try:
|
| 134 |
+
logger.info(" β Attempting legacy api-inference endpoint as fallback")
|
| 135 |
+
r = requests.post(old_url, headers=headers, json=payload, timeout=120)
|
| 136 |
+
except Exception as e2:
|
| 137 |
+
logger.error(f" β Legacy endpoint request failed: {e2}")
|
| 138 |
+
return ""
|
| 139 |
+
|
| 140 |
if r.status_code != 200:
|
| 141 |
logger.error(f" β Remote inference error {r.status_code}: {r.text[:200]}")
|
| 142 |
return ""
|
|
|
|
| 381 |
return answer
|
| 382 |
|
| 383 |
|
| 384 |
+
def scaffold_and_polish(query: str, retrieved_docs: List[Document], llm_client) -> Optional[str]:
|
| 385 |
+
"""Create a concise scaffold (approx 150-220 words) from retrieved docs,
|
| 386 |
+
then ask the remote (or local) LLM to expand and polish it into a
|
| 387 |
+
320-420 word expert answer. Returns None if polishing fails.
|
| 388 |
+
"""
|
| 389 |
+
logger.info(f"π¨ Building scaffold for polish: '{query}'")
|
| 390 |
+
import re
|
| 391 |
+
|
| 392 |
+
# Reuse sentence extraction logic but stop early for a compact scaffold
|
| 393 |
+
all_text = "\n\n".join([d.page_content for d in retrieved_docs[:12]])
|
| 394 |
+
sentences = re.split(r'(?<=[.!?])\s+', all_text)
|
| 395 |
+
sentences = [s.strip() for s in sentences if len(s.strip()) > 30]
|
| 396 |
+
if not sentences:
|
| 397 |
+
logger.warning(" β No sentences to build scaffold")
|
| 398 |
+
return None
|
| 399 |
+
|
| 400 |
+
# Score sentences by overlap with query + fashion keywords
|
| 401 |
+
query_tokens = set(re.findall(r"\w+", query.lower()))
|
| 402 |
+
fashion_keywords = set(["outfit","wear","wardrobe","style","colors","layer","blazer",
|
| 403 |
+
"trousers","dress","shoes","sweater","jacket","care","wool","fit",
|
| 404 |
+
"tailor","neutral","accessory","season","fall"])
|
| 405 |
+
keywords = query_tokens.union(fashion_keywords)
|
| 406 |
+
|
| 407 |
+
scored = []
|
| 408 |
+
for s in sentences:
|
| 409 |
+
s_tokens = set(re.findall(r"\w+", s.lower()))
|
| 410 |
+
score = len(s_tokens & keywords)
|
| 411 |
+
score += min(2, len(s.split()) // 30)
|
| 412 |
+
scored.append((score, s))
|
| 413 |
+
|
| 414 |
+
scored.sort(key=lambda x: x[0], reverse=True)
|
| 415 |
+
scaffold_parts = []
|
| 416 |
+
word_count = 0
|
| 417 |
+
for _, s in scored:
|
| 418 |
+
scaffold_parts.append(s)
|
| 419 |
+
word_count = len(" ".join(scaffold_parts).split())
|
| 420 |
+
if word_count >= 180:
|
| 421 |
+
break
|
| 422 |
+
|
| 423 |
+
scaffold = "\n\n".join(scaffold_parts).strip()
|
| 424 |
+
if not scaffold:
|
| 425 |
+
logger.warning(" β Scaffold empty after selection")
|
| 426 |
+
return None
|
| 427 |
+
|
| 428 |
+
# Craft polish prompt
|
| 429 |
+
polish_prompt = f"""Please rewrite and expand the draft below into a clear, expert, natural-flowing answer of about 320-420 words to the question: {query}
|
| 430 |
+
|
| 431 |
+
Requirements:
|
| 432 |
+
- Keep paragraphs natural and connected.
|
| 433 |
+
- Preserve factual content from the draft and avoid inventing unsupported facts.
|
| 434 |
+
- Use a friendly, expert tone and provide practical, actionable advice.
|
| 435 |
+
|
| 436 |
+
Draft:
|
| 437 |
+
{scaffold}
|
| 438 |
+
|
| 439 |
+
Answer:
|
| 440 |
+
"""
|
| 441 |
+
|
| 442 |
+
logger.info(" β Polishing scaffold with LLM")
|
| 443 |
+
try:
|
| 444 |
+
if USE_REMOTE_LLM:
|
| 445 |
+
polished = remote_generate(polish_prompt, max_new_tokens=600, temperature=0.72, top_p=0.92)
|
| 446 |
+
else:
|
| 447 |
+
out = llm_client(polish_prompt, max_new_tokens=600, temperature=0.72, top_p=0.92, do_sample=True, num_beams=1)
|
| 448 |
+
polished = out[0].get('generated_text', '') if isinstance(out, list) and out else str(out)
|
| 449 |
+
polished = polished.strip()
|
| 450 |
+
except Exception as e:
|
| 451 |
+
logger.error(f" β Polishing error: {e}")
|
| 452 |
+
return None
|
| 453 |
+
|
| 454 |
+
if not polished:
|
| 455 |
+
logger.warning(" β Polished output empty")
|
| 456 |
+
return None
|
| 457 |
+
|
| 458 |
+
final_words = polished.split()
|
| 459 |
+
fw = len(final_words)
|
| 460 |
+
if fw < 300:
|
| 461 |
+
logger.warning(f" β Polished output too short ({fw} words)")
|
| 462 |
+
return None
|
| 463 |
+
if fw > 460:
|
| 464 |
+
polished = ' '.join(final_words[:420]) + '...'
|
| 465 |
+
|
| 466 |
+
logger.info(f" β
Polished answer ready ({len(polished.split())} words)")
|
| 467 |
+
return polished
|
| 468 |
+
|
| 469 |
+
|
| 470 |
def retrieve_knowledge_langchain(
|
| 471 |
query: str,
|
| 472 |
vectorstore,
|
|
|
|
| 738 |
|
| 739 |
if not llm_answer:
|
| 740 |
logger.error(f" β All 2 LLM attempts failed")
|
| 741 |
+
# Next attempt: if remote LLM is available, build a short scaffold from
|
| 742 |
+
# retrieved documents and ask the remote model to polish/expand it. This
|
| 743 |
+
# is more reliable than single-shot long generation on some models.
|
| 744 |
+
if USE_REMOTE_LLM:
|
| 745 |
+
try:
|
| 746 |
+
logger.info(" β Attempting scaffold-and-polish using remote LLM")
|
| 747 |
+
polished = scaffold_and_polish(query, retrieved_docs, llm_client)
|
| 748 |
+
if polished:
|
| 749 |
+
logger.info(" β
Scaffold-and-polish produced an answer")
|
| 750 |
+
return polished
|
| 751 |
+
except Exception as e:
|
| 752 |
+
logger.error(f" β Scaffold-and-polish error: {e}")
|
| 753 |
+
|
| 754 |
+
# Final fallback: extractive templated answer (guaranteed deterministic)
|
| 755 |
try:
|
| 756 |
logger.info(" β Using extractive fallback generator")
|
| 757 |
fallback = generate_extractive_answer(query, retrieved_docs)
|