https://blog.csdn.net/qq_35462334/article/details/133806824
https://googlechromelabs.github.io/chrome-for-testing/
用于python selenium 的 chrome模拟抓取网页
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
def get_url_content_with_selenium(url):
    # 设置Chrome选项
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 无头模式,不打开浏览器窗口
    # 初始化WebDriver
    service = Service('c:/chromedriver/chromedriver.exe')  # 替换为你的chromedriver路径
    driver = webdriver.Chrome(service=service, options=chrome_options)
    try:
        # 打开URL
        driver.get(url)
        # 等待页面加载完成(这里等待body元素出现,你可能需要根据实际情况调整等待条件)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        # 可能需要额外等待一些时间让JavaScript执行完毕
        time.sleep(5)
        # 获取页面源代码
        page_source = driver.page_source
        return page_source
    finally:
        driver.quit()
def extract_and_save_section(html_content, output_file):
    # 使用BeautifulSoup解析HTML
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # 查找指定的section元素
    section = soup.find('section', class_='test-list', attrs={'data-sys-id': 'adf84e91f19b4-b6cecf4a2df72'})
    
    if section:
        # 将section内容保存到文件
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(str(section))
        print(f"Section content saved to {output_file}")
    else:
        print("Section not found")
# 使用示例
url = "https://zujuan.xkw.com/gzsx/zsd28806/o2p10"
output_file = "section_content.html"
content = get_url_content_with_selenium(url)
print(content)
if content:
    extract_and_save_section(content, output_file)
else:
    print("Failed to retrieve content")