160 lines
4.5 KiB
TypeScript
160 lines
4.5 KiB
TypeScript
// @ts-ignore
|
|
const articles_objects = [];
|
|
|
|
const substack_href = "https://substack.com/@benjamintoby";
|
|
|
|
// @ts-ignore
|
|
async function sleep(wait) {
|
|
return new Promise((res) => {
|
|
setTimeout(() => {
|
|
res(true);
|
|
}, wait);
|
|
});
|
|
}
|
|
|
|
function grabContentHeight() {
|
|
return document.querySelector(
|
|
"div[style='max-width: 568px;']",
|
|
// @ts-ignore
|
|
)?.offsetHeight;
|
|
}
|
|
|
|
async function scrollToEnd() {
|
|
let last_content_height = grabContentHeight();
|
|
|
|
while (true) {
|
|
window.scrollTo({
|
|
top: document.body.scrollHeight,
|
|
behavior: "smooth",
|
|
});
|
|
|
|
await sleep(5000);
|
|
|
|
const current_content_height = grabContentHeight();
|
|
|
|
if (current_content_height > last_content_height) {
|
|
last_content_height = current_content_height;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
await scrollToEnd();
|
|
|
|
const articles = Array.from(
|
|
document.querySelectorAll("div[role='article']"),
|
|
);
|
|
|
|
console.log(`Handling ${articles.length} Articles ...`);
|
|
|
|
for (let i = 0; i < articles.length; i++) {
|
|
let present_articles = Array.from(
|
|
document.querySelectorAll("div[role='article']"),
|
|
);
|
|
|
|
console.log(`Found ${present_articles.length} Present Articles!`);
|
|
|
|
while (i > present_articles.length - 1) {
|
|
window.scrollTo({
|
|
top: document.body.scrollHeight,
|
|
behavior: "smooth",
|
|
});
|
|
|
|
console.log(`Searching for Article #${i} ...`);
|
|
|
|
await sleep(5000);
|
|
|
|
present_articles = Array.from(
|
|
document.querySelectorAll("div[role='article']"),
|
|
);
|
|
}
|
|
|
|
const article = present_articles[i];
|
|
|
|
console.log(`Handling Article #${i} ...`);
|
|
|
|
const content_div = article.querySelector(`.FeedProseMirror`);
|
|
const date_link = Array.from(article.querySelectorAll("a")).find((a) =>
|
|
Boolean(a.getAttribute("title")),
|
|
);
|
|
|
|
if (!content_div) continue;
|
|
|
|
const content_div_first_paragraph = content_div.querySelector(`p`);
|
|
|
|
if (!content_div_first_paragraph) continue;
|
|
|
|
let window_url = window.location.href;
|
|
const initial_text_content = content_div.textContent;
|
|
|
|
const article_object = {
|
|
title: initial_text_content,
|
|
content: initial_text_content,
|
|
html: content_div.innerHTML,
|
|
images: [],
|
|
date: date_link?.getAttribute("title"),
|
|
};
|
|
|
|
const article_images = Array.from(
|
|
article.querySelectorAll("picture img"),
|
|
);
|
|
|
|
if (article_images?.[0]) {
|
|
for (let img = 0; img < article_images.length; img++) {
|
|
if (img > 0) {
|
|
const article_image = article_images[img];
|
|
// @ts-ignore
|
|
const article_image_srcset = article_image.srcset;
|
|
const largest_image = article_image_srcset
|
|
.split(` `)
|
|
.at(-2);
|
|
// @ts-ignore
|
|
article_object.images.push(largest_image);
|
|
}
|
|
}
|
|
}
|
|
|
|
const more_content = Array.from(article.querySelectorAll("a"))
|
|
.find((el) => el.textContent.includes("See more"))
|
|
?.click();
|
|
|
|
await sleep(2000);
|
|
|
|
let new_window_url = window.location.href;
|
|
|
|
if (new_window_url === window_url) {
|
|
const new_article_content =
|
|
article.querySelector(`.FeedProseMirror`);
|
|
|
|
if (new_article_content) {
|
|
article_object.content = new_article_content.textContent;
|
|
article_object.html = new_article_content.innerHTML;
|
|
}
|
|
|
|
articles_objects.push(article_object);
|
|
} else {
|
|
const text_sample = content_div_first_paragraph.textContent;
|
|
const target_content_div = Array.from(
|
|
document.querySelectorAll(".ProseMirror.FeedProseMirror"),
|
|
).find((el) => el.textContent.includes(text_sample));
|
|
|
|
if (target_content_div) {
|
|
article_object.content = target_content_div.textContent;
|
|
article_object.html = target_content_div.innerHTML;
|
|
}
|
|
|
|
articles_objects.push(article_object);
|
|
|
|
window.history.back();
|
|
await sleep(2000);
|
|
}
|
|
}
|
|
|
|
// @ts-ignore
|
|
console.log(articles_objects);
|
|
}
|
|
|
|
main();
|