init commit
This commit is contained in:
207
Post_RSS_on_SubStack.py
Normal file
207
Post_RSS_on_SubStack.py
Normal file
@@ -0,0 +1,207 @@
|
||||
import asyncio
|
||||
import argparse
|
||||
import requests
|
||||
import feedparser
|
||||
import io
|
||||
import html
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from logging.handlers import RotatingFileHandler
|
||||
import locale
|
||||
import random
|
||||
|
||||
from substack import Api
|
||||
from substack.post import Post
|
||||
|
||||
LOG = logging.getLogger('bot')
|
||||
LOG_PATTERN = logging.Formatter('%(asctime)s:%(levelname)s: [%(filename)s] %(message)s')
|
||||
|
||||
|
||||
locale.setlocale(locale.LC_TIME, 'fr_FR')
|
||||
|
||||
|
||||
def setuplogger():
|
||||
|
||||
conf_filename = None
|
||||
|
||||
steam_handler = logging.StreamHandler()
|
||||
steam_handler.setFormatter(LOG_PATTERN)
|
||||
steam_handler.setLevel(logging.DEBUG)
|
||||
|
||||
def setup_logger(logger_name, file_name=None, add_steam=False):
|
||||
file_name = file_name or logger_name
|
||||
log_filename = f"{file_name}.log"
|
||||
|
||||
logger = logging.getLogger(logger_name)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
file_handler = RotatingFileHandler(log_filename, "a", 1000000, 1)
|
||||
file_handler.setFormatter(LOG_PATTERN)
|
||||
logger.addHandler(file_handler)
|
||||
if add_steam:
|
||||
logger.addHandler(steam_handler)
|
||||
|
||||
setup_logger("bot", conf_filename, True)
|
||||
|
||||
class RSSfeed():
|
||||
def __init__(self, url, yt=False):
|
||||
self.url = url
|
||||
self.youtube = yt
|
||||
|
||||
class SubStackTask:
|
||||
def __init__(self, login, password, account, feeds):
|
||||
self.api = Api(
|
||||
email=login,
|
||||
password=password,
|
||||
publication_url=account,
|
||||
)
|
||||
|
||||
self.user_id = self.api.get_user_id()
|
||||
self.feeds = feeds
|
||||
|
||||
|
||||
def get_fr_date(self):
|
||||
locale.setlocale(locale.LC_TIME, 'fr_FR')
|
||||
today = datetime.datetime.now()
|
||||
today = today.strftime("%d %B %Y")
|
||||
locale.setlocale(locale.LC_TIME, 'C')
|
||||
return today
|
||||
|
||||
|
||||
async def run_daily_at_6_am(self):
|
||||
while True:
|
||||
now = datetime.now()
|
||||
# Calculate the time until 6 AM next day
|
||||
next_run = (now + timedelta(days=1)).replace(hour=6, minute=5, second=0, microsecond=0)
|
||||
sleep_seconds = (next_run - now).total_seconds()
|
||||
|
||||
# Wait until the next run time
|
||||
await asyncio.sleep(sleep_seconds)
|
||||
|
||||
# Run the daily task
|
||||
await daily_task()
|
||||
|
||||
|
||||
|
||||
async def daily_task(self):
|
||||
|
||||
title_post = "Les news du " + self.get_fr_date()
|
||||
|
||||
sub_stack_post = Post(
|
||||
title=title_post,
|
||||
subtitle="",
|
||||
user_id=self.user_id
|
||||
)
|
||||
|
||||
midnight_today = datetime.datetime.now(datetime.timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
yesterday_6am = datetime.now(timezone.utc).replace(hour=6, minute=0, second=0, microsecond=0) - timedelta(days=1)
|
||||
|
||||
formatted_date = midnight_today.strftime('%a, %d %b %Y %H:%M:%S %z')
|
||||
|
||||
all_news_posts = []
|
||||
|
||||
for feed in self.feeds:
|
||||
|
||||
print(feed.url)
|
||||
html_text = requests.get(feed.url).text
|
||||
newsFeed = feedparser.parse(html_text)
|
||||
|
||||
|
||||
if feed.youtube is True:
|
||||
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.fromisoformat(entry.published) > yesterday_6am]
|
||||
else:
|
||||
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.strptime(entry.published.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z') > yesterday_6am]
|
||||
|
||||
all_news_posts.extend(new_posts)
|
||||
|
||||
|
||||
random.shuffle(all_news_posts)
|
||||
|
||||
|
||||
for post in all_news_posts:
|
||||
linkURL = post["link"]
|
||||
title = post["title"]
|
||||
ftext = ""
|
||||
|
||||
if "summary" in post:
|
||||
ftext = html.unescape(post["summary"])
|
||||
# Using regular expressions to remove HTML tags
|
||||
ftext = re.sub('<[^<]+?>', '', ftext)
|
||||
pattern = r"L’article .* est apparu en premier sur .*"
|
||||
ftext = re.sub(pattern, '', ftext)
|
||||
|
||||
if "yt_videoid" in post:
|
||||
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
||||
videoId = post["yt_videoid"]
|
||||
print(videoId)
|
||||
sub_stack_post.add({"type":"youtube2", "src": videoId })
|
||||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||||
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
||||
else:
|
||||
|
||||
|
||||
|
||||
|
||||
if ftext != "":
|
||||
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
||||
sub_stack_post.add({"type":"paragraph", "content": ftext })
|
||||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||||
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
||||
|
||||
if "links" in post:
|
||||
for link in post["links"]:
|
||||
|
||||
if link["type"] == "image/jpg":
|
||||
imgUrl = link["href"]
|
||||
sub_stack_post.add({'type': 'captionedImage', 'src': imgUrl})
|
||||
|
||||
|
||||
sub_stack_post.add({"type":"horizontal_rule"})
|
||||
|
||||
|
||||
|
||||
sub_stack_post.add({"type":"heading", "level":3, "content": "Sources"})
|
||||
for feed in self.feeds:
|
||||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
||||
{'content': feed.url, 'marks': [{'type': "link", 'href': feed.url}]}]})
|
||||
|
||||
|
||||
draft = self.api.post_draft(sub_stack_post.get_draft())
|
||||
self.api.prepublish_draft(draft.get("id"))
|
||||
#self.api.publish_draft(draft.get("id"))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
async def main(login, password, account):
|
||||
|
||||
setuplogger()
|
||||
|
||||
if os.path.exists("last_scan_date.txt"):
|
||||
with open("last_scan_date.txt", "r") as f:
|
||||
last_post_date = datetime.datetime.strptime(f.read().strip(), '%a, %d %b %Y %H:%M:%S %z')
|
||||
else:
|
||||
last_post_date = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
|
||||
|
||||
feeds = []
|
||||
|
||||
feeds.append(RSSfeed("https://www.factornews.com/rss.xml"))
|
||||
feeds.append(RSSfeed("https://nofrag.com/feed"))
|
||||
feeds.append(RSSfeed("https://dystopeek.fr/feed/"))
|
||||
feeds.append(RSSfeed("https://thepixelpost.com/rss/"))
|
||||
feeds.append(RSSfeed("https://yamukass.substack.com/feed"))
|
||||
feeds.append(RSSfeed("https://www.youtube.com/feeds/videos.xml?channel_id=UC-OvBDfZGn1OdsqMBwkOI_A", True))
|
||||
|
||||
task = SubStackTask(login, password, account, feeds)
|
||||
|
||||
#await task.run_daily_at_6_am()
|
||||
await task.daily_task()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main("gael.honorez@gmail.com", "f3PaTGedjFc2gkr1ypi5", "https://aggregateurjvfr.substack.com"))
|
||||
Reference in New Issue
Block a user