Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import logging
|
|
| 5 |
import zipfile
|
| 6 |
import requests
|
| 7 |
import bibtexparser
|
|
|
|
| 8 |
from urllib.parse import quote, urlencode
|
| 9 |
import gradio as gr
|
| 10 |
from bs4 import BeautifulSoup
|
|
@@ -419,8 +420,8 @@ class PaperDownloader:
|
|
| 419 |
|
| 420 |
return None
|
| 421 |
|
| 422 |
-
def download_single_doi(self, doi
|
| 423 |
-
"""Downloads a single paper using a DOI
|
| 424 |
if not doi:
|
| 425 |
return None, "Error: DOI not provided", "Error: DOI not provided"
|
| 426 |
|
|
@@ -435,20 +436,17 @@ class PaperDownloader:
|
|
| 435 |
with open(filepath, 'wb') as f:
|
| 436 |
f.write(pdf_content)
|
| 437 |
logger.info(f"Successfully downloaded: {filename}")
|
| 438 |
-
progress(1)
|
| 439 |
return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
|
| 440 |
else:
|
| 441 |
logger.warning(f"Could not download: {doi}")
|
| 442 |
-
progress(1)
|
| 443 |
return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
|
| 444 |
|
| 445 |
except Exception as e:
|
| 446 |
logger.error(f"Error processing {doi}: {e}")
|
| 447 |
-
progress(1)
|
| 448 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
| 449 |
|
| 450 |
-
def download_multiple_dois(self, dois_text
|
| 451 |
-
"""Downloads multiple papers from a list of DOIs
|
| 452 |
if not dois_text:
|
| 453 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
| 454 |
|
|
@@ -456,13 +454,11 @@ class PaperDownloader:
|
|
| 456 |
if not dois:
|
| 457 |
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
|
| 458 |
|
| 459 |
-
total_dois = len(dois)
|
| 460 |
downloaded_files = []
|
| 461 |
failed_dois = []
|
| 462 |
downloaded_links = []
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
filepath, success_message, fail_message = self.download_single_doi(doi, progress=progress)
|
| 466 |
if filepath:
|
| 467 |
# Unique filename for zip
|
| 468 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
|
@@ -473,8 +469,6 @@ class PaperDownloader:
|
|
| 473 |
|
| 474 |
else:
|
| 475 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
| 476 |
-
progress((i + 1) / total_dois)
|
| 477 |
-
|
| 478 |
|
| 479 |
if downloaded_files:
|
| 480 |
zip_filename = 'papers.zip'
|
|
@@ -485,8 +479,8 @@ class PaperDownloader:
|
|
| 485 |
|
| 486 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
| 487 |
|
| 488 |
-
def process_bibtex(self, bib_file
|
| 489 |
-
"""Process BibTeX file and download papers with multiple strategies
|
| 490 |
# Read BibTeX file content from the uploaded object
|
| 491 |
try:
|
| 492 |
with open(bib_file.name, 'r', encoding='utf-8') as f:
|
|
@@ -510,11 +504,9 @@ class PaperDownloader:
|
|
| 510 |
downloaded_files = []
|
| 511 |
failed_dois = []
|
| 512 |
downloaded_links = []
|
| 513 |
-
|
| 514 |
-
total_dois = len(dois)
|
| 515 |
|
| 516 |
# Download PDFs
|
| 517 |
-
for
|
| 518 |
try:
|
| 519 |
# Try to download with multiple methods with retries
|
| 520 |
pdf_content = self.download_with_retry(doi)
|
|
@@ -538,7 +530,6 @@ class PaperDownloader:
|
|
| 538 |
except Exception as e:
|
| 539 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
| 540 |
logger.error(f"Error processing {doi}: {e}")
|
| 541 |
-
progress((i + 1) / total_dois)
|
| 542 |
|
| 543 |
# Create ZIP of downloaded papers
|
| 544 |
if downloaded_files:
|
|
@@ -550,7 +541,7 @@ class PaperDownloader:
|
|
| 550 |
|
| 551 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
| 552 |
|
| 553 |
-
async def process_bibtex_async(self, bib_file
|
| 554 |
"""Process BibTeX file and download papers with multiple strategies"""
|
| 555 |
# Read BibTeX file content from the uploaded object
|
| 556 |
try:
|
|
@@ -575,10 +566,9 @@ class PaperDownloader:
|
|
| 575 |
downloaded_files = []
|
| 576 |
failed_dois = []
|
| 577 |
downloaded_links = []
|
| 578 |
-
total_dois = len(dois)
|
| 579 |
|
| 580 |
# Download PDFs
|
| 581 |
-
for
|
| 582 |
try:
|
| 583 |
# Try to download with multiple methods with retries
|
| 584 |
pdf_content = await self.download_with_retry_async(doi)
|
|
@@ -602,8 +592,6 @@ class PaperDownloader:
|
|
| 602 |
except Exception as e:
|
| 603 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
| 604 |
logger.error(f"Error processing {doi}: {e}")
|
| 605 |
-
progress((i + 1) / total_dois)
|
| 606 |
-
|
| 607 |
|
| 608 |
# Create ZIP of downloaded papers
|
| 609 |
if downloaded_files:
|
|
@@ -619,19 +607,19 @@ def create_gradio_interface():
|
|
| 619 |
"""Create Gradio interface for Paper Downloader"""
|
| 620 |
downloader = PaperDownloader()
|
| 621 |
|
| 622 |
-
async def download_papers(bib_file, doi_input, dois_input
|
| 623 |
if bib_file:
|
| 624 |
# Check file type
|
| 625 |
if not bib_file.name.lower().endswith('.bib'):
|
| 626 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
| 627 |
|
| 628 |
-
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file
|
| 629 |
return zip_path, downloaded_dois, failed_dois, None
|
| 630 |
elif doi_input:
|
| 631 |
-
filepath, message, failed_doi = downloader.download_single_doi(doi_input
|
| 632 |
return None, message, failed_doi, filepath
|
| 633 |
elif dois_input:
|
| 634 |
-
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input
|
| 635 |
return zip_path, downloaded_dois, failed_dois, None
|
| 636 |
else:
|
| 637 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
|
|
|
| 5 |
import zipfile
|
| 6 |
import requests
|
| 7 |
import bibtexparser
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
from urllib.parse import quote, urlencode
|
| 10 |
import gradio as gr
|
| 11 |
from bs4 import BeautifulSoup
|
|
|
|
| 420 |
|
| 421 |
return None
|
| 422 |
|
| 423 |
+
def download_single_doi(self, doi):
|
| 424 |
+
"""Downloads a single paper using a DOI"""
|
| 425 |
if not doi:
|
| 426 |
return None, "Error: DOI not provided", "Error: DOI not provided"
|
| 427 |
|
|
|
|
| 436 |
with open(filepath, 'wb') as f:
|
| 437 |
f.write(pdf_content)
|
| 438 |
logger.info(f"Successfully downloaded: {filename}")
|
|
|
|
| 439 |
return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
|
| 440 |
else:
|
| 441 |
logger.warning(f"Could not download: {doi}")
|
|
|
|
| 442 |
return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
|
| 443 |
|
| 444 |
except Exception as e:
|
| 445 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
| 446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
| 447 |
|
| 448 |
+
def download_multiple_dois(self, dois_text):
|
| 449 |
+
"""Downloads multiple papers from a list of DOIs"""
|
| 450 |
if not dois_text:
|
| 451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
| 452 |
|
|
|
|
| 454 |
if not dois:
|
| 455 |
return None, "Error: No valid DOIs provided", "Error: No valid DOIs provided"
|
| 456 |
|
|
|
|
| 457 |
downloaded_files = []
|
| 458 |
failed_dois = []
|
| 459 |
downloaded_links = []
|
| 460 |
+
for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
|
| 461 |
+
filepath, success_message, fail_message = self.download_single_doi(doi)
|
|
|
|
| 462 |
if filepath:
|
| 463 |
# Unique filename for zip
|
| 464 |
filename = f"{str(doi).replace('/', '_').replace('.', '_')}_{i}.pdf"
|
|
|
|
| 469 |
|
| 470 |
else:
|
| 471 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
|
|
|
|
|
|
| 472 |
|
| 473 |
if downloaded_files:
|
| 474 |
zip_filename = 'papers.zip'
|
|
|
|
| 479 |
|
| 480 |
return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
|
| 481 |
|
| 482 |
+
def process_bibtex(self, bib_file):
|
| 483 |
+
"""Process BibTeX file and download papers with multiple strategies"""
|
| 484 |
# Read BibTeX file content from the uploaded object
|
| 485 |
try:
|
| 486 |
with open(bib_file.name, 'r', encoding='utf-8') as f:
|
|
|
|
| 504 |
downloaded_files = []
|
| 505 |
failed_dois = []
|
| 506 |
downloaded_links = []
|
|
|
|
|
|
|
| 507 |
|
| 508 |
# Download PDFs
|
| 509 |
+
for doi in tqdm(dois, desc="Downloading papers"):
|
| 510 |
try:
|
| 511 |
# Try to download with multiple methods with retries
|
| 512 |
pdf_content = self.download_with_retry(doi)
|
|
|
|
| 530 |
except Exception as e:
|
| 531 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
| 532 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
| 533 |
|
| 534 |
# Create ZIP of downloaded papers
|
| 535 |
if downloaded_files:
|
|
|
|
| 541 |
|
| 542 |
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
| 543 |
|
| 544 |
+
async def process_bibtex_async(self, bib_file):
|
| 545 |
"""Process BibTeX file and download papers with multiple strategies"""
|
| 546 |
# Read BibTeX file content from the uploaded object
|
| 547 |
try:
|
|
|
|
| 566 |
downloaded_files = []
|
| 567 |
failed_dois = []
|
| 568 |
downloaded_links = []
|
|
|
|
| 569 |
|
| 570 |
# Download PDFs
|
| 571 |
+
for doi in tqdm(dois, desc="Downloading papers"):
|
| 572 |
try:
|
| 573 |
# Try to download with multiple methods with retries
|
| 574 |
pdf_content = await self.download_with_retry_async(doi)
|
|
|
|
| 592 |
except Exception as e:
|
| 593 |
failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
|
| 594 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
|
|
|
| 595 |
|
| 596 |
# Create ZIP of downloaded papers
|
| 597 |
if downloaded_files:
|
|
|
|
| 607 |
"""Create Gradio interface for Paper Downloader"""
|
| 608 |
downloader = PaperDownloader()
|
| 609 |
|
| 610 |
+
async def download_papers(bib_file, doi_input, dois_input):
|
| 611 |
if bib_file:
|
| 612 |
# Check file type
|
| 613 |
if not bib_file.name.lower().endswith('.bib'):
|
| 614 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
| 615 |
|
| 616 |
+
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
|
| 617 |
return zip_path, downloaded_dois, failed_dois, None
|
| 618 |
elif doi_input:
|
| 619 |
+
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
| 620 |
return None, message, failed_doi, filepath
|
| 621 |
elif dois_input:
|
| 622 |
+
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
|
| 623 |
return zip_path, downloaded_dois, failed_dois, None
|
| 624 |
else:
|
| 625 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|