265 lines
8.8 KiB
Python
265 lines
8.8 KiB
Python
import asyncio
|
||
import argparse
|
||
import requests
|
||
import feedparser
|
||
import io
|
||
import html
|
||
import datetime
|
||
import logging
|
||
import os
|
||
import re
|
||
from logging.handlers import RotatingFileHandler
|
||
import random
|
||
|
||
import pyvirtualdisplay
|
||
|
||
|
||
from substack import Api
|
||
from substack.post import Post
|
||
|
||
LOG = logging.getLogger('bot')
|
||
LOG_PATTERN = logging.Formatter('%(asctime)s:%(levelname)s: [%(filename)s] %(message)s')
|
||
|
||
def setuplogger():
|
||
|
||
conf_filename = None
|
||
|
||
steam_handler = logging.StreamHandler()
|
||
steam_handler.setFormatter(LOG_PATTERN)
|
||
steam_handler.setLevel(logging.DEBUG)
|
||
|
||
def setup_logger(logger_name, file_name=None, add_steam=False):
|
||
file_name = file_name or logger_name
|
||
log_filename = f"{file_name}.log"
|
||
|
||
logger = logging.getLogger(logger_name)
|
||
logger.setLevel(logging.DEBUG)
|
||
file_handler = RotatingFileHandler(log_filename, "a", 1000000, 1)
|
||
file_handler.setFormatter(LOG_PATTERN)
|
||
logger.addHandler(file_handler)
|
||
if add_steam:
|
||
logger.addHandler(steam_handler)
|
||
|
||
setup_logger("bot", conf_filename, True)
|
||
|
||
class RSSfeed():
|
||
def __init__(self, url, yt=False):
|
||
self.url = url
|
||
self.youtube = yt
|
||
|
||
class SubStackTask:
|
||
def __init__(self, login, password, cookies_path, account, feeds):
|
||
self.api = Api(
|
||
email=login,
|
||
password=password,
|
||
cookies_path=cookies_path,
|
||
publication_url=account,
|
||
)
|
||
|
||
self.user_id = self.api.get_user_id()
|
||
self.feeds = feeds
|
||
for feed in self.feeds:
|
||
LOG.info("Adding feed " + feed.url)
|
||
|
||
|
||
def format_duration(self, seconds):
|
||
days, seconds = divmod(seconds, 86400)
|
||
hours, seconds = divmod(seconds, 3600)
|
||
minutes, seconds = divmod(seconds, 60)
|
||
|
||
days = round(days)
|
||
hours = round(hours)
|
||
minutes = round(minutes)
|
||
seconds = round(seconds)
|
||
|
||
parts = []
|
||
if days > 0:
|
||
parts.append(f"{days} days")
|
||
if hours > 0:
|
||
parts.append(f"{hours} hours")
|
||
if minutes > 0:
|
||
parts.append(f"{minutes} minutes")
|
||
if seconds > 0:
|
||
parts.append(f"{seconds} seconds")
|
||
|
||
return ', '.join(parts) if parts else '0 seconds'
|
||
|
||
def get_fr_date(self):
|
||
# Mapping of English month names to French
|
||
months_en_to_fr = {
|
||
'January': 'Janvier', 'February': 'Février', 'March': 'Mars',
|
||
'April': 'Avril', 'May': 'Mai', 'June': 'Juin',
|
||
'July': 'Juillet', 'August': 'Août', 'September': 'Septembre',
|
||
'October': 'Octobre', 'November': 'Novembre', 'December': 'Décembre'
|
||
}
|
||
today = datetime.datetime.now()
|
||
formatted_date = today.strftime("%d %B %Y")
|
||
# Replace the English month with the French month
|
||
for en, fr in months_en_to_fr.items():
|
||
formatted_date = formatted_date.replace(en, fr)
|
||
return formatted_date
|
||
|
||
async def run_daily_at_6_am(self):
|
||
while True:
|
||
now = datetime.datetime.now()
|
||
# Calculate the time until 6 AM next day
|
||
next_run = (now + datetime.timedelta(days=1)).replace(hour=6, minute=5, second=0, microsecond=0)
|
||
sleep_seconds = (next_run - now).total_seconds()
|
||
|
||
while sleep_seconds > 0:
|
||
# Check if the remaining time is a multiple of 3600 seconds
|
||
formatted_duration = self.format_duration(sleep_seconds)
|
||
LOG.info(f"Waiting for {formatted_duration} for next scan")
|
||
|
||
# Wait for some time before checking again
|
||
await asyncio.sleep(min(sleep_seconds, 5 * 60))
|
||
|
||
# Recalculate the remaining sleep time
|
||
now = datetime.datetime.now()
|
||
sleep_seconds = (next_run - now).total_seconds()
|
||
|
||
LOG.info("Going to run the daily task")
|
||
# Run the daily task
|
||
await self.daily_task()
|
||
|
||
|
||
|
||
async def daily_task(self):
|
||
|
||
title_post = "Les news du " + self.get_fr_date()
|
||
|
||
LOG.info("Running daily task : " + str(title_post))
|
||
|
||
ff = r'/data/feeds.txt'
|
||
if os.path.isfile(ff) is False:
|
||
ff = r'x:\substack\feeds.txt'
|
||
|
||
self.feeds = []
|
||
with open(ff) as file:
|
||
lines = [line.rstrip() for line in file]
|
||
|
||
for line in lines:
|
||
youtube = "youtube" in line
|
||
self.feeds.append(RSSfeed(line, youtube))
|
||
|
||
sub_stack_post = Post(
|
||
title=title_post,
|
||
subtitle="",
|
||
user_id=self.user_id
|
||
)
|
||
|
||
midnight_today = datetime.datetime.now(datetime.timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
||
yesterday_6am = datetime.datetime.now(datetime.timezone.utc).replace(hour=6, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1)
|
||
|
||
formatted_date = midnight_today.strftime('%a, %d %b %Y %H:%M:%S %z')
|
||
|
||
all_news_posts = []
|
||
|
||
for feed in self.feeds:
|
||
LOG.info("Scanning feed " + feed.url)
|
||
html_text = requests.get(feed.url).text
|
||
newsFeed = feedparser.parse(html_text)
|
||
|
||
|
||
if feed.youtube is True:
|
||
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.fromisoformat(entry.published) > yesterday_6am]
|
||
else:
|
||
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.strptime(entry.published.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z') > yesterday_6am]
|
||
|
||
all_news_posts.extend(new_posts)
|
||
|
||
|
||
random.shuffle(all_news_posts)
|
||
|
||
|
||
for post in all_news_posts:
|
||
linkURL = post["link"]
|
||
title = post["title"]
|
||
ftext = ""
|
||
|
||
LOG.info("Posting " + str(title))
|
||
|
||
if "summary" in post:
|
||
ftext = html.unescape(post["summary"])
|
||
# Using regular expressions to remove HTML tags
|
||
ftext = re.sub('<[^<]+?>', '', ftext)
|
||
pattern = r"L’article .* est apparu en premier sur .*"
|
||
ftext = re.sub(pattern, '', ftext)
|
||
|
||
if "yt_videoid" in post:
|
||
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
||
videoId = post["yt_videoid"]
|
||
sub_stack_post.add({"type":"youtube2", "src": videoId })
|
||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
||
else:
|
||
|
||
|
||
|
||
|
||
if ftext != "":
|
||
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
||
sub_stack_post.add({"type":"paragraph", "content": ftext })
|
||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
||
|
||
if "links" in post:
|
||
for link in post["links"]:
|
||
|
||
if link["type"] == "image/jpg":
|
||
imgUrl = link["href"]
|
||
sub_stack_post.add({'type': 'captionedImage', 'src': imgUrl})
|
||
|
||
|
||
sub_stack_post.add({"type":"horizontal_rule"})
|
||
|
||
|
||
|
||
sub_stack_post.add({"type":"heading", "level":3, "content": "Sources"})
|
||
for feed in self.feeds:
|
||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||
{'content': feed.url, 'marks': [{'type': "link", 'href': feed.url}]}]})
|
||
|
||
|
||
sub_stack_post.add({"type":"subscribeWidget", "message":"Abonnez-vous gratuitement pour recevoir chaque jour les news dans votre e-mail et soutenir mon travail."})
|
||
|
||
draft = self.api.post_draft(sub_stack_post.get_draft())
|
||
self.api.prepublish_draft(draft.get("id"))
|
||
self.api.publish_draft(draft.get("id"))
|
||
|
||
async def main(login, password, account):
|
||
|
||
setuplogger()
|
||
|
||
if os.path.exists("last_scan_date.txt"):
|
||
with open("last_scan_date.txt", "r") as f:
|
||
last_post_date = datetime.datetime.strptime(f.read().strip(), '%a, %d %b %Y %H:%M:%S %z')
|
||
else:
|
||
last_post_date = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
|
||
|
||
feeds = []
|
||
|
||
ff = r'/data/feeds.txt'
|
||
if os.path.isfile(ff) is False:
|
||
ff = r'feeds.txt'
|
||
|
||
cookies_path = r'/data/cookies.json'
|
||
if os.path.isfile(cookies_path) is False:
|
||
cookies_path = r'cookies.json'
|
||
|
||
with open(ff) as file:
|
||
lines = [line.rstrip() for line in file]
|
||
|
||
for line in lines:
|
||
youtube = "youtube" in line
|
||
feeds.append(RSSfeed(line, youtube))
|
||
|
||
task = SubStackTask(login, password, cookies_path, account, feeds)
|
||
|
||
LOG.info("Starting bot")
|
||
await task.run_daily_at_6_am()
|
||
#await task.daily_task()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main("gael.honorez@gmail.com", "f3PaTGedjFc2gkr1ypi5", "https://aggregateurjvfr.substack.com")) |