| import requests
|
| from bs4 import BeautifulSoup
|
| import json
|
| import time
|
|
|
| SERPAPI_KEY = "c3e3e8fd8d12ca55d8a8954a14bf827f2d4261ef55373b381661f23b1440a2af"
|
|
|
| def google_search(query, num_results=1000):
|
| url = "https://serpapi.com/search"
|
| params = {
|
| "engine": "google",
|
| "q": query,
|
| "api_key": SERPAPI_KEY,
|
| "num": num_results
|
| }
|
| resp = requests.get(url, params=params)
|
| data = resp.json()
|
| links = []
|
| for result in data.get("organic_results", []):
|
| link = result.get("link")
|
| if link:
|
| links.append(link)
|
| return links
|
|
|
| def scrape_page(url):
|
| try:
|
| resp = requests.get(url, timeout=10, headers={"User-Agent": "Mozilla/5.0"})
|
| soup = BeautifulSoup(resp.content, "html.parser")
|
| text = soup.get_text(separator="\n", strip=True)
|
| return text[:20000]
|
| except Exception as e:
|
| return f"[SCRAPE ERROR] {e}"
|
|
|
| def scrape_topic_and_save(topic, filename="results.json"):
|
| links = google_search(topic)
|
| results = []
|
| for url in links:
|
| print(f"Scraping: {url}")
|
| content = scrape_page(url)
|
| results.append({"url": url, "content": content})
|
| time.sleep(0)
|
| with open(filename, "w", encoding="utf-8") as f:
|
| json.dump(results, f, ensure_ascii=False, indent=2)
|
| print(f"Saved {len(results)} results to {filename}")
|
|
|
| if __name__ == "__main__":
|
| topic = input("Enter topic to search: ")
|
| scrape_topic_and_save(topic) |