有些飞书文档比较有价值,但是通常会比较敏感,权限被设置的很死,不能导出,还容易失踪,因此写了个脚本专门用于禁止复制/导出的情况下获取文本内容。
旧版飞书文档:
let running = true;
const text = {};
const scanText = () => {
let cleared = false;
const handler = () => {
Array.from(document.querySelectorAll(".ace-line")).forEach((node) => {
const id = parseInt(/magicdomid-(\d+)/.exec(node.id)[1]);
if (!(id in text)) console.log(`part ${id} added`);
text[id] = Array.from(node.querySelectorAll("span[data-string]"))
.map((span) => span.textContent)
.join("");
});
if (!running && !cleared) {
clearInterval(handler);
console.log("stop");
cleared = true;
}
};
setInterval(handler, 100);
};
const getText = () => {
running = false;
const resultText = Object.entries(text)
.sort((a, b) => parseInt(a[0]) - parseInt(b[0]))
.map((a) => a[1])
.join("\n");
copy(resultText);
console.log("text copied to clipboard");
return resultText;
};
scanText();
新版飞书文档:
let running = true;
const block = {};
const extractBlockText = (node) => Array.from(node.querySelectorAll("span[data-string]")).map((span) => span.textContent).join("");
const scanText = () => {
let cleared = false;
const handler = () => {
Array.from(document.querySelectorAll(".block")).forEach((node) => {
const id = parseInt(node.getAttribute('data-block-id'));
if (id in block) return;
const blockClass = node.classList[1];
if (blockClass === 'docx-text-block') {
block[id] = extractBlockText(node);
} else if (/docx-heading(\d)-block/.exec(blockClass)) {
const h = /docx-heading(\d)-block/.exec(blockClass)[1];
block[id] = '#'.repeat(h) + ' ' + extractBlockText(node);
} else if (blockClass === 'docx-bullet-block') {
block[id] = '* ' + extractBlockText(node);
} else {
return
}
node.style.backgroundColor = '#89D961';
});
if (!running && !cleared) {
clearInterval(handler);
console.log("stop");
cleared = true;
}
};
setInterval(handler, 100);
};
const getText = () => {
running = false;
const resultText = Object.entries(block)
.sort((a, b) => parseInt(a[0]) - parseInt(b[0]))
.map((a) => a[1])
.join("\n\n");
copy(resultText);
console.log("text copied to clipboard");
return resultText;
};
scanText();
由于飞书文档是动态加载的(而且window.scroll不能用),需要手动滑动过整篇文档保证所有内容都被读取到。
然后在控制台执行 getText()将内容保存到剪贴板 。