Home-Toolbox-Plugin/utils/html.ts

114 lines
3.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 删除标题里面没用的内容
const pickTextNode = (el: HTMLElement) => {
const fragment = document.createDocumentFragment();
while (el.firstChild) {
// 检查当前子节点是否是文本节点
if (el.firstChild.nodeType === Node.TEXT_NODE) {
// 如果是文本节点,将其移动到 DocumentFragment 中
fragment.appendChild(el.firstChild);
}
else {
el.removeChild(el.firstChild);
}
}
return fragment.textContent;
}
// 删除里面的属性
const removeAttributes = (el: HTMLElement) => {
const attrs = el.attributes;
Array.from(attrs).forEach((attr) => {
el.removeAttribute(attr.name);
});
}
// 格式化 HTML 内容
export const formatHTML = (html: string, extraFormatter?: (doc: Document) => void) => {
// 删除空格
html = html.replaceAll(" ", "");
const nextDocument = document.implementation.createHTMLDocument();
nextDocument.documentElement.innerHTML = html;
// 删除标题内无效元素
const titleEl = nextDocument.querySelectorAll("h2, h3, h4, h5, h6") as NodeListOf<HTMLElement>;
titleEl.forEach((el) => {
// 遍历并移除所有属性
removeAttributes(el);
el.innerHTML = pickTextNode(el);
});
// 优化 p 标签
const paraEl = nextDocument.querySelectorAll("p") as NodeListOf<HTMLElement>;
paraEl.forEach((el) => {
// 删除空的 p 标签(没图片的)
if (!el.innerText.trim() && !el.querySelector("img")) {
el.remove();
}
// 遍历并移除所有属性
removeAttributes(el);
// 删除 p 里面的 span 替换成普通 Text应该没用的
const spanEl = el.querySelectorAll("span");
spanEl.forEach((el) => {
el.parentNode.replaceChild(document.createTextNode(el.innerText), el);
});
});
// 优化 img 标签,仅保留有效内容
const imgEl = nextDocument.querySelectorAll("img") as NodeListOf<HTMLImageElement>;
imgEl.forEach((el) => {
const nextImgEl = document.createElement("img");
nextImgEl.src = el.src;
nextImgEl.alt = el.alt;
el.parentNode.replaceChild(nextImgEl, el);
});
// 删除 figure 标签
const figureEl = nextDocument.querySelectorAll("figure") as NodeListOf<HTMLElement>;
figureEl.forEach((el) => {
const img = el.querySelector("img");
if (img) {
el.innerHTML = "";
el.appendChild(img);
}
});
// 删除 style 标签
const stylesEl = nextDocument.querySelectorAll("style");
stylesEl.forEach((el) => {
el.remove();
});
// 提取 pre 下面的内容
const preEl = nextDocument.querySelectorAll("pre");
preEl.forEach((el) => {
removeAttributes(el);
// hljs / prism
const codeEl = el.querySelector("code") as HTMLElement;
if (codeEl) {
const nextCodeEl = document.createElement("code");
nextCodeEl.innerText = codeEl.innerText;
el.innerHTML = null;
el.appendChild(nextCodeEl);
}
});
if (extraFormatter) {
extraFormatter(nextDocument);
}
return nextDocument.documentElement.innerHTML;
}