init commit
This commit is contained in:
207
Post_RSS_on_SubStack.py
Normal file
207
Post_RSS_on_SubStack.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
import asyncio
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
import feedparser
|
||||||
|
import io
|
||||||
|
import html
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
import locale
|
||||||
|
import random
|
||||||
|
|
||||||
|
from substack import Api
|
||||||
|
from substack.post import Post
|
||||||
|
|
||||||
|
LOG = logging.getLogger('bot')
|
||||||
|
LOG_PATTERN = logging.Formatter('%(asctime)s:%(levelname)s: [%(filename)s] %(message)s')
|
||||||
|
|
||||||
|
|
||||||
|
locale.setlocale(locale.LC_TIME, 'fr_FR')
|
||||||
|
|
||||||
|
|
||||||
|
def setuplogger():
|
||||||
|
|
||||||
|
conf_filename = None
|
||||||
|
|
||||||
|
steam_handler = logging.StreamHandler()
|
||||||
|
steam_handler.setFormatter(LOG_PATTERN)
|
||||||
|
steam_handler.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
def setup_logger(logger_name, file_name=None, add_steam=False):
|
||||||
|
file_name = file_name or logger_name
|
||||||
|
log_filename = f"{file_name}.log"
|
||||||
|
|
||||||
|
logger = logging.getLogger(logger_name)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
file_handler = RotatingFileHandler(log_filename, "a", 1000000, 1)
|
||||||
|
file_handler.setFormatter(LOG_PATTERN)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
if add_steam:
|
||||||
|
logger.addHandler(steam_handler)
|
||||||
|
|
||||||
|
setup_logger("bot", conf_filename, True)
|
||||||
|
|
||||||
|
class RSSfeed():
|
||||||
|
def __init__(self, url, yt=False):
|
||||||
|
self.url = url
|
||||||
|
self.youtube = yt
|
||||||
|
|
||||||
|
class SubStackTask:
|
||||||
|
def __init__(self, login, password, account, feeds):
|
||||||
|
self.api = Api(
|
||||||
|
email=login,
|
||||||
|
password=password,
|
||||||
|
publication_url=account,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.user_id = self.api.get_user_id()
|
||||||
|
self.feeds = feeds
|
||||||
|
|
||||||
|
|
||||||
|
def get_fr_date(self):
|
||||||
|
locale.setlocale(locale.LC_TIME, 'fr_FR')
|
||||||
|
today = datetime.datetime.now()
|
||||||
|
today = today.strftime("%d %B %Y")
|
||||||
|
locale.setlocale(locale.LC_TIME, 'C')
|
||||||
|
return today
|
||||||
|
|
||||||
|
|
||||||
|
async def run_daily_at_6_am(self):
|
||||||
|
while True:
|
||||||
|
now = datetime.now()
|
||||||
|
# Calculate the time until 6 AM next day
|
||||||
|
next_run = (now + timedelta(days=1)).replace(hour=6, minute=5, second=0, microsecond=0)
|
||||||
|
sleep_seconds = (next_run - now).total_seconds()
|
||||||
|
|
||||||
|
# Wait until the next run time
|
||||||
|
await asyncio.sleep(sleep_seconds)
|
||||||
|
|
||||||
|
# Run the daily task
|
||||||
|
await daily_task()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def daily_task(self):
|
||||||
|
|
||||||
|
title_post = "Les news du " + self.get_fr_date()
|
||||||
|
|
||||||
|
sub_stack_post = Post(
|
||||||
|
title=title_post,
|
||||||
|
subtitle="",
|
||||||
|
user_id=self.user_id
|
||||||
|
)
|
||||||
|
|
||||||
|
midnight_today = datetime.datetime.now(datetime.timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
yesterday_6am = datetime.now(timezone.utc).replace(hour=6, minute=0, second=0, microsecond=0) - timedelta(days=1)
|
||||||
|
|
||||||
|
formatted_date = midnight_today.strftime('%a, %d %b %Y %H:%M:%S %z')
|
||||||
|
|
||||||
|
all_news_posts = []
|
||||||
|
|
||||||
|
for feed in self.feeds:
|
||||||
|
|
||||||
|
print(feed.url)
|
||||||
|
html_text = requests.get(feed.url).text
|
||||||
|
newsFeed = feedparser.parse(html_text)
|
||||||
|
|
||||||
|
|
||||||
|
if feed.youtube is True:
|
||||||
|
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.fromisoformat(entry.published) > yesterday_6am]
|
||||||
|
else:
|
||||||
|
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.strptime(entry.published.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z') > yesterday_6am]
|
||||||
|
|
||||||
|
all_news_posts.extend(new_posts)
|
||||||
|
|
||||||
|
|
||||||
|
random.shuffle(all_news_posts)
|
||||||
|
|
||||||
|
|
||||||
|
for post in all_news_posts:
|
||||||
|
linkURL = post["link"]
|
||||||
|
title = post["title"]
|
||||||
|
ftext = ""
|
||||||
|
|
||||||
|
if "summary" in post:
|
||||||
|
ftext = html.unescape(post["summary"])
|
||||||
|
# Using regular expressions to remove HTML tags
|
||||||
|
ftext = re.sub('<[^<]+?>', '', ftext)
|
||||||
|
pattern = r"L’article .* est apparu en premier sur .*"
|
||||||
|
ftext = re.sub(pattern, '', ftext)
|
||||||
|
|
||||||
|
if "yt_videoid" in post:
|
||||||
|
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
||||||
|
videoId = post["yt_videoid"]
|
||||||
|
print(videoId)
|
||||||
|
sub_stack_post.add({"type":"youtube2", "src": videoId })
|
||||||
|
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||||||
|
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
||||||
|
else:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if ftext != "":
|
||||||
|
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
||||||
|
sub_stack_post.add({"type":"paragraph", "content": ftext })
|
||||||
|
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||||||
|
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
||||||
|
|
||||||
|
if "links" in post:
|
||||||
|
for link in post["links"]:
|
||||||
|
|
||||||
|
if link["type"] == "image/jpg":
|
||||||
|
imgUrl = link["href"]
|
||||||
|
sub_stack_post.add({'type': 'captionedImage', 'src': imgUrl})
|
||||||
|
|
||||||
|
|
||||||
|
sub_stack_post.add({"type":"horizontal_rule"})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sub_stack_post.add({"type":"heading", "level":3, "content": "Sources"})
|
||||||
|
for feed in self.feeds:
|
||||||
|
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||||||
|
{'content': feed.url, 'marks': [{'type': "link", 'href': feed.url}]}]})
|
||||||
|
|
||||||
|
|
||||||
|
draft = self.api.post_draft(sub_stack_post.get_draft())
|
||||||
|
self.api.prepublish_draft(draft.get("id"))
|
||||||
|
#self.api.publish_draft(draft.get("id"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def main(login, password, account):
|
||||||
|
|
||||||
|
setuplogger()
|
||||||
|
|
||||||
|
if os.path.exists("last_scan_date.txt"):
|
||||||
|
with open("last_scan_date.txt", "r") as f:
|
||||||
|
last_post_date = datetime.datetime.strptime(f.read().strip(), '%a, %d %b %Y %H:%M:%S %z')
|
||||||
|
else:
|
||||||
|
last_post_date = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
feeds.append(RSSfeed("https://www.factornews.com/rss.xml"))
|
||||||
|
feeds.append(RSSfeed("https://nofrag.com/feed"))
|
||||||
|
feeds.append(RSSfeed("https://dystopeek.fr/feed/"))
|
||||||
|
feeds.append(RSSfeed("https://thepixelpost.com/rss/"))
|
||||||
|
feeds.append(RSSfeed("https://yamukass.substack.com/feed"))
|
||||||
|
feeds.append(RSSfeed("https://www.youtube.com/feeds/videos.xml?channel_id=UC-OvBDfZGn1OdsqMBwkOI_A", True))
|
||||||
|
|
||||||
|
task = SubStackTask(login, password, account, feeds)
|
||||||
|
|
||||||
|
#await task.run_daily_at_6_am()
|
||||||
|
await task.daily_task()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main("gael.honorez@gmail.com", "f3PaTGedjFc2gkr1ypi5", "https://aggregateurjvfr.substack.com"))
|
||||||
Reference in New Issue
Block a user