// 删除标题里面没用的内容 const pickTextNode = (el: HTMLElement) => { const fragment = document.createDocumentFragment(); while (el.firstChild) { // 检查当前子节点是否是文本节点 if (el.firstChild.nodeType === Node.TEXT_NODE) { // 如果是文本节点,将其移动到 DocumentFragment 中 fragment.appendChild(el.firstChild); } else { el.removeChild(el.firstChild); } } return fragment.textContent; } // 删除里面的属性 const removeAttributes = (el: HTMLElement) => { const attrs = el.attributes; Array.from(attrs).forEach((attr) => { el.removeAttribute(attr.name); }); } interface FormatHTMLOptions { formatter?: (doc: Document) => void; baseURL?: string; } // 格式化 HTML 内容 export const formatHTML = (html: string, options: FormatHTMLOptions = {}) => { const { formatter, baseURL } = options; // 删除空格 html = html.replaceAll(" ", ""); const nextDocument = document.implementation.createHTMLDocument(); nextDocument.documentElement.innerHTML = html; // 删除标题内无效元素 const titleEl = nextDocument.querySelectorAll("h2, h3, h4, h5, h6") as NodeListOf; titleEl.forEach((el) => { // 遍历并移除所有属性 removeAttributes(el); el.innerHTML = pickTextNode(el); }); // 优化 p 标签 const paraEl = nextDocument.querySelectorAll("p") as NodeListOf; paraEl.forEach((el) => { // 删除空的 p 标签(没图片的) if (!el.innerText.trim() && !el.querySelector("img")) { el.remove(); } // 遍历并移除所有属性 removeAttributes(el); // 删除 p 里面的 span 替换成普通 Text,应该没用的 const spanEl = el.querySelectorAll("span"); spanEl.forEach((el) => { el.parentNode.replaceChild(document.createTextNode(el.innerText), el); }); }); // 优化 img 标签,仅保留有效内容 const imgEl = nextDocument.querySelectorAll("img") as NodeListOf; imgEl.forEach((el) => { const tempImg = document.createElement("img"); // 创建一个临时的 a 元素来解析相对路径 const tempLink = document.createElement("a"); tempLink.href = el.src; // 如果提供了 baseURL 且图片链接是相对路径,则使用 baseURL 构建完整链接 if (baseURL && !tempLink.href.startsWith('http')) { tempImg.src = new URL(el.src, baseURL).href; } else { tempImg.src = tempLink.href; } tempImg.alt = el.alt; // 复制 width 和 height 属性 if (el.width) { tempImg.width = el.width; } if (el.height) { tempImg.height = el.height; } el.parentNode.replaceChild(tempImg, el); }); // 删除 figure 标签 const figureEl = nextDocument.querySelectorAll("figure") as NodeListOf; figureEl.forEach((el) => { const img = el.querySelector("img"); if (img) { el.innerHTML = ""; el.appendChild(img); } }); // 删除 style 标签 const stylesEl = nextDocument.querySelectorAll("style"); stylesEl.forEach((el) => { el.remove(); }); // 删除 script 标签 const scriptEl = nextDocument.querySelectorAll("script"); scriptEl.forEach((el) => { el.remove(); }); // 提取 pre 下面的内容 const preEl = nextDocument.querySelectorAll("pre"); preEl.forEach((el) => { removeAttributes(el); // hljs / prism const codeEl = el.querySelector("code") as HTMLElement; if (codeEl) { const nextCodeEl = document.createElement("code"); nextCodeEl.innerText = codeEl.innerText; el.innerHTML = null; el.appendChild(nextCodeEl); } }); if (formatter) { formatter(nextDocument); } return nextDocument.documentElement.innerHTML; }