前言
在一些文章类程序中,我们直接对文章内容检索的话,数据量大,速度较慢,我们可以在保存的时候获取文章的摘要,方便后续检索。
根据字数获取
这种方式可以作为文章概要。
不过滤
1 2 3 4 5 6 7 8 9 10 11
| function getSummaryByContent(className,maxLength){ maxLength = maxLength || 500; let showDom = document.querySelector("."+className); let textContent = showDom.innerText; return textContent.substring(0, maxLength) + (textContent.length > maxLength ? "..." : ""); }
let description = getSummaryByContent("post-body",200); console.info(description);
|
排除某些标签
排除代码标签
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| function getSummaryExcludeTag(className,maxLength){ maxLength = maxLength || 500; const parentElement = document.querySelector("."+className); if(!parentElement){ return ""; } const removedTagNames = ['pre','figure']; const childNodes = parentElement.childNodes; const divNew = document.createElement('div'); Array.from(childNodes).forEach(node => { if (node.nodeType === 1 && removedTagNames.indexOf(node.tagName.toLowerCase()) === -1) { divNew.appendChild(node.cloneNode(true)); } }); let textContent = divNew.innerText; return textContent.substring(0, maxLength) + (textContent.length > maxLength ? "..." : ""); }
let description = getSummaryExcludeTag("post-body",400); console.info(description);
|
排除某些样式
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| function getSummaryExcludeClass(className,maxLength){ maxLength = maxLength || 500; const parentElement = document.querySelector("."+className); if(!parentElement){ return ""; } const removedClassNames = ['highlight']; const childNodes = parentElement.childNodes; const divNew = document.createElement('div'); Array.from(childNodes).forEach(node => { if (node.nodeType === 1 && !removedClassNames.some(item => node.classList.contains(item))) { divNew.appendChild(node.cloneNode(true)); } }); let textContent = divNew.innerText; return textContent.substring(0, maxLength) + (textContent.length > maxLength ? "..." : ""); }
let description = getSummaryExcludeClass("post-body",400); console.info(description);
|
根据标题获取
这种方式可以作为检索关键字使用。
1 2 3 4 5 6 7 8 9 10 11 12
| function getSummaryByTitle(className,maxLength){ maxLength = maxLength || 500; let showDom = document.querySelector("."+className); const headings = showDom.querySelectorAll('h1, h2, h3, h4, h5, h6'); let titleArr = Array.from(headings).map(heading=>heading.innerText || heading.textContent); let textContent = titleArr.join(" "); return textContent.substring(0, maxLength) + (textContent.length > maxLength ? "..." : ""); }
let description = getSummaryByTitle("post-body",200); console.info(description);
|
打印
1 2 3 4 5 6 7 8
| let showDom = document.querySelector(".post-body");
const headings = showDom.querySelectorAll('h1, h2, h3, h4, h5, h6');
headings.forEach((heading, index) => { const text = heading.innerText || heading.textContent; console.log(`标题 ${index + 1}: ${text}`); });
|
获取高频词
只对英文有效
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
| function getHighFrequencyWords(text, threshold) { const words = text.toLowerCase().match(/\w+/g);
const wordCountMap = {}; words.forEach(word => { wordCountMap[word] = (wordCountMap[word] || 0) + 1; });
const highFrequencyWords = Object.keys(wordCountMap).filter(word => wordCountMap[word] >= threshold);
return highFrequencyWords; }
let showDom = document.querySelector(".post-body");
let textContent = showDom.innerText;
const highFrequencyWords = getHighFrequencyWords(textContent, 3);
console.log("高频词:", highFrequencyWords);
|
中文高频词需要引用中文分词的库。