Compare commits
51 Commits
39bd8ef012
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
85d79db3fd | ||
|
|
e15d53339f | ||
|
|
80d7c45cfb | ||
|
|
85b4ea9e24 | ||
|
|
91add44592 | ||
|
|
898ada327f | ||
|
|
c66935bcb6 | ||
|
|
1db321b7d4 | ||
|
|
d08a598fd7 | ||
|
|
595c11eeb5 | ||
|
|
8074e7df84 | ||
|
|
b6102d0b4f | ||
|
|
39a4ed88de | ||
|
|
542aea6602 | ||
|
|
a1e462eab9 | ||
|
|
a727d5f66a | ||
|
|
ae9c1517d9 | ||
|
|
6cf99fe2e1 | ||
|
|
7b1ee4a0ab | ||
|
|
fae2482067 | ||
|
|
5f57eab24a | ||
|
|
67f85515a9 | ||
|
|
a0f988ed32 | ||
|
|
74d61522a4 | ||
|
|
8fbdf0eff6 | ||
|
|
34d91a3677 | ||
| 83869b0663 | |||
|
|
e3d51d6c03 | ||
|
|
08442b88e6 | ||
|
|
967527835b | ||
|
|
8b1a331eb4 | ||
|
|
ef04c73c31 | ||
|
|
c78078ce35 | ||
|
|
668843d8e8 | ||
|
|
e0127a0362 | ||
|
|
f17cd92f90 | ||
|
|
d2b39db82e | ||
|
|
364660a7f5 | ||
|
|
cfe4c70a32 | ||
|
|
2db0a6543f | ||
|
|
ef414db31f | ||
|
|
a3885024db | ||
|
|
16d72ffd32 | ||
|
|
a3719f1a35 | ||
|
|
f8b9ba7eb6 | ||
|
|
320863ca50 | ||
|
|
5c68063ad3 | ||
|
|
84ab8eb974 | ||
|
|
c268dd00c8 | ||
|
|
5934363ec4 | ||
| ee1960d5d1 |
160
.gitignore
vendored
Normal file
160
.gitignore
vendored
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
19
Dockerfile
19
Dockerfile
@@ -1,8 +1,19 @@
|
|||||||
FROM python:3.8
|
FROM python:3.12
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y git
|
RUN apt-get update && apt-get install -y git
|
||||||
RUN git clone http://192.168.1.25:8124/zep/Substack_JV.git /app
|
RUN git clone https://gitea.zep.best/zep/Substack_JV.git /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
COPY requirements.txt .
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
|
|
||||||
|
ENV TZ=Europe/Brussels
|
||||||
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
RUN playwright install --with-deps chromium
|
||||||
COPY update_and_run.sh /app
|
COPY update_and_run.sh /app
|
||||||
RUN chmod +x /app/update_and_run.sh
|
# Normalize line endings (Windows CRLF -> LF) and ensure readable
|
||||||
CMD ["./update_and_run.sh"]
|
RUN sed -i 's/\r$//' /app/update_and_run.sh && chmod a+r /app/update_and_run.sh
|
||||||
|
|
||||||
|
# Single entrypoint: run via sh (no exec bit required, survives noexec mounts)
|
||||||
|
ENTRYPOINT ["sh", "/app/update_and_run.sh"]
|
||||||
201
LICENSE
Normal file
201
LICENSE
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
@@ -1,209 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
import argparse
|
|
||||||
import requests
|
|
||||||
import feedparser
|
|
||||||
import io
|
|
||||||
import html
|
|
||||||
import datetime
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from logging.handlers import RotatingFileHandler
|
|
||||||
import random
|
|
||||||
|
|
||||||
from substack import Api
|
|
||||||
from substack.post import Post
|
|
||||||
|
|
||||||
LOG = logging.getLogger('bot')
|
|
||||||
LOG_PATTERN = logging.Formatter('%(asctime)s:%(levelname)s: [%(filename)s] %(message)s')
|
|
||||||
|
|
||||||
def setuplogger():
|
|
||||||
|
|
||||||
conf_filename = None
|
|
||||||
|
|
||||||
steam_handler = logging.StreamHandler()
|
|
||||||
steam_handler.setFormatter(LOG_PATTERN)
|
|
||||||
steam_handler.setLevel(logging.DEBUG)
|
|
||||||
|
|
||||||
def setup_logger(logger_name, file_name=None, add_steam=False):
|
|
||||||
file_name = file_name or logger_name
|
|
||||||
log_filename = f"{file_name}.log"
|
|
||||||
|
|
||||||
logger = logging.getLogger(logger_name)
|
|
||||||
logger.setLevel(logging.DEBUG)
|
|
||||||
file_handler = RotatingFileHandler(log_filename, "a", 1000000, 1)
|
|
||||||
file_handler.setFormatter(LOG_PATTERN)
|
|
||||||
logger.addHandler(file_handler)
|
|
||||||
if add_steam:
|
|
||||||
logger.addHandler(steam_handler)
|
|
||||||
|
|
||||||
setup_logger("bot", conf_filename, True)
|
|
||||||
|
|
||||||
class RSSfeed():
|
|
||||||
def __init__(self, url, yt=False):
|
|
||||||
self.url = url
|
|
||||||
self.youtube = yt
|
|
||||||
|
|
||||||
class SubStackTask:
|
|
||||||
def __init__(self, login, password, account, feeds):
|
|
||||||
self.api = Api(
|
|
||||||
email=login,
|
|
||||||
password=password,
|
|
||||||
publication_url=account,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.user_id = self.api.get_user_id()
|
|
||||||
self.feeds = feeds
|
|
||||||
|
|
||||||
|
|
||||||
def get_fr_date(self):
|
|
||||||
# Mapping of English month names to French
|
|
||||||
months_en_to_fr = {
|
|
||||||
'January': 'Janvier', 'February': 'Février', 'March': 'Mars',
|
|
||||||
'April': 'Avril', 'May': 'Mai', 'June': 'Juin',
|
|
||||||
'July': 'Juillet', 'August': 'Août', 'September': 'Septembre',
|
|
||||||
'October': 'Octobre', 'November': 'Novembre', 'December': 'Décembre'
|
|
||||||
}
|
|
||||||
today = datetime.datetime.now()
|
|
||||||
formatted_date = today.strftime("%d %B %Y")
|
|
||||||
# Replace the English month with the French month
|
|
||||||
for en, fr in months_en_to_fr.items():
|
|
||||||
formatted_date = formatted_date.replace(en, fr)
|
|
||||||
return formatted_date
|
|
||||||
|
|
||||||
async def run_daily_at_6_am(self):
|
|
||||||
while True:
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
# Calculate the time until 6 AM next day
|
|
||||||
next_run = (now + datetime.timedelta(days=1)).replace(hour=6, minute=5, second=0, microsecond=0)
|
|
||||||
sleep_seconds = (next_run - now).total_seconds()
|
|
||||||
LOG.info("Waiting for " + str(sleep_seconds) + " seconds for next scan")
|
|
||||||
# Wait until the next run time
|
|
||||||
await asyncio.sleep(sleep_seconds)
|
|
||||||
|
|
||||||
# Run the daily task
|
|
||||||
await self.daily_task()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def daily_task(self):
|
|
||||||
|
|
||||||
title_post = "Les news du " + self.get_fr_date()
|
|
||||||
|
|
||||||
sub_stack_post = Post(
|
|
||||||
title=title_post,
|
|
||||||
subtitle="",
|
|
||||||
user_id=self.user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
midnight_today = datetime.datetime.now(datetime.timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
|
||||||
yesterday_6am = datetime.datetime.now(datetime.timezone.utc).replace(hour=6, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1)
|
|
||||||
|
|
||||||
formatted_date = midnight_today.strftime('%a, %d %b %Y %H:%M:%S %z')
|
|
||||||
|
|
||||||
all_news_posts = []
|
|
||||||
|
|
||||||
for feed in self.feeds:
|
|
||||||
|
|
||||||
html_text = requests.get(feed.url).text
|
|
||||||
newsFeed = feedparser.parse(html_text)
|
|
||||||
|
|
||||||
|
|
||||||
if feed.youtube is True:
|
|
||||||
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.fromisoformat(entry.published) > yesterday_6am]
|
|
||||||
else:
|
|
||||||
new_posts = [entry for entry in newsFeed.entries if datetime.datetime.strptime(entry.published.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z') > yesterday_6am]
|
|
||||||
|
|
||||||
all_news_posts.extend(new_posts)
|
|
||||||
|
|
||||||
|
|
||||||
random.shuffle(all_news_posts)
|
|
||||||
|
|
||||||
|
|
||||||
for post in all_news_posts:
|
|
||||||
linkURL = post["link"]
|
|
||||||
title = post["title"]
|
|
||||||
ftext = ""
|
|
||||||
|
|
||||||
LOG.info("Posting " + str(title))
|
|
||||||
|
|
||||||
if "summary" in post:
|
|
||||||
ftext = html.unescape(post["summary"])
|
|
||||||
# Using regular expressions to remove HTML tags
|
|
||||||
ftext = re.sub('<[^<]+?>', '', ftext)
|
|
||||||
pattern = r"L’article .* est apparu en premier sur .*"
|
|
||||||
ftext = re.sub(pattern, '', ftext)
|
|
||||||
|
|
||||||
if "yt_videoid" in post:
|
|
||||||
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
|
||||||
videoId = post["yt_videoid"]
|
|
||||||
sub_stack_post.add({"type":"youtube2", "src": videoId })
|
|
||||||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
|
||||||
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
|
||||||
else:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if ftext != "":
|
|
||||||
sub_stack_post.add({"type":"heading", "level":3, "content": title})
|
|
||||||
sub_stack_post.add({"type":"paragraph", "content": ftext })
|
|
||||||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
|
||||||
{'content': linkURL, 'marks': [{'type': "link", 'href': linkURL}]}]})
|
|
||||||
|
|
||||||
if "links" in post:
|
|
||||||
for link in post["links"]:
|
|
||||||
|
|
||||||
if link["type"] == "image/jpg":
|
|
||||||
imgUrl = link["href"]
|
|
||||||
sub_stack_post.add({'type': 'captionedImage', 'src': imgUrl})
|
|
||||||
|
|
||||||
|
|
||||||
sub_stack_post.add({"type":"horizontal_rule"})
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sub_stack_post.add({"type":"heading", "level":3, "content": "Sources"})
|
|
||||||
for feed in self.feeds:
|
|
||||||
sub_stack_post.add({'type': 'paragraph', 'content': [
|
|
||||||
{'content': feed.url, 'marks': [{'type': "link", 'href': feed.url}]}]})
|
|
||||||
|
|
||||||
|
|
||||||
sub_stack_post.add({"type":"subscribeWidget", "message":"Abonnez-vous gratuitement pour recevoir chaque jour les news dans votre e-mail et soutenir mon travail."})
|
|
||||||
|
|
||||||
draft = self.api.post_draft(sub_stack_post.get_draft())
|
|
||||||
self.api.prepublish_draft(draft.get("id"))
|
|
||||||
self.api.publish_draft(draft.get("id"))
|
|
||||||
|
|
||||||
async def main(login, password, account):
|
|
||||||
|
|
||||||
setuplogger()
|
|
||||||
|
|
||||||
if os.path.exists("last_scan_date.txt"):
|
|
||||||
with open("last_scan_date.txt", "r") as f:
|
|
||||||
last_post_date = datetime.datetime.strptime(f.read().strip(), '%a, %d %b %Y %H:%M:%S %z')
|
|
||||||
else:
|
|
||||||
last_post_date = datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
|
|
||||||
|
|
||||||
feeds = []
|
|
||||||
|
|
||||||
feeds.append(RSSfeed("https://www.factornews.com/rss.xml"))
|
|
||||||
feeds.append(RSSfeed("https://nofrag.com/feed"))
|
|
||||||
feeds.append(RSSfeed("https://dystopeek.fr/feed/"))
|
|
||||||
feeds.append(RSSfeed("https://thepixelpost.com/rss/"))
|
|
||||||
feeds.append(RSSfeed("https://yamukass.substack.com/feed"))
|
|
||||||
feeds.append(RSSfeed("https://tseret.com/categorie/tests/feed"))
|
|
||||||
feeds.append(RSSfeed("https://www.gamesidestory.com/feed"))
|
|
||||||
feeds.append(RSSfeed("https://www.nintendo-town.fr/feed"))
|
|
||||||
feeds.append(RSSfeed("https://www.youtube.com/feeds/videos.xml?channel_id=UC-OvBDfZGn1OdsqMBwkOI_A", True))
|
|
||||||
feeds.append(RSSfeed("https://www.youtube.com/feeds/videos.xml?playlist_id=PLZRiqJjIUlDTrwYs_UqEIts5fVaBpaIEz", True))
|
|
||||||
|
|
||||||
task = SubStackTask(login, password, account, feeds)
|
|
||||||
|
|
||||||
LOG.info("Starting bot")
|
|
||||||
await task.run_daily_at_6_am()
|
|
||||||
#await task.daily_task()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main("gael.honorez@gmail.com", "f3PaTGedjFc2gkr1ypi5", "https://aggregateurjvfr.substack.com"))
|
|
||||||
162
backfill.py
Normal file
162
backfill.py
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
# backfill_from_ghost.py
|
||||||
|
from __future__ import annotations
|
||||||
|
import os, re, sys, html
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
# Reuse your existing GhostAdmin client (same headers/base/proxy behavior)
|
||||||
|
# Adjust the import path if your Ghost client lives elsewhere.
|
||||||
|
from presquegratos import GhostAdmin
|
||||||
|
|
||||||
|
from storage import Storage
|
||||||
|
from keys import xgp_key, egs_key, psplus_key
|
||||||
|
|
||||||
|
# ---------------- Ghost helpers (reusing your admin client) ----------------
|
||||||
|
def ghost_list_posts(ghost: GhostAdmin, page: int = 1) -> Dict:
|
||||||
|
# Minimal params: avoid 'filter' and 'fields' to dodge 400 behind __bot proxy
|
||||||
|
url = ghost.base + "posts/"
|
||||||
|
params = {
|
||||||
|
"limit": "50",
|
||||||
|
"page": str(page),
|
||||||
|
"order": "published_at DESC",
|
||||||
|
"formats": "lexical,html", # <-- IMPORTANT
|
||||||
|
}
|
||||||
|
r = requests.get(url, headers=ghost._headers(), params=params, timeout=30)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
def list_recap_posts(ghost: GhostAdmin, hard_limit: int = 2000) -> List[Dict]:
|
||||||
|
posts: List[Dict] = []
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
data = ghost_list_posts(ghost, page=page)
|
||||||
|
batch = data.get("posts", [])
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
# client-side filter to be robust to proxy quirks
|
||||||
|
for p in batch:
|
||||||
|
title = (p.get("title") or "").strip()
|
||||||
|
if title.startswith("Récap hebdo"):
|
||||||
|
posts.append(p)
|
||||||
|
if len(batch) < 50 or len(posts) >= hard_limit:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
return posts
|
||||||
|
|
||||||
|
# ---------------- Parsing helpers (unchanged) ----------------
|
||||||
|
#MS_STORE_RE = re.compile(r"(?:microsoft|xbox)\.com/.*/store/.*/([0-9A-Z]{12,})", re.I)
|
||||||
|
MS_STORE_RE = re.compile(r"(?:xbox|microsoft)\.com/.*/store/.*/([0-9A-Z]{12,16})", re.I)
|
||||||
|
EPIC_RE = re.compile(r"epicgames\.com/store/.*/p/([\w\-]+)", re.I)
|
||||||
|
PSBLOG_RE = re.compile(r"blog\.playstation\.com/.*", re.I)
|
||||||
|
|
||||||
|
def clean_text(s: str) -> str:
|
||||||
|
return re.sub(r"\s+", " ", html.unescape(s or "")).strip()
|
||||||
|
|
||||||
|
def extract_sections(soup: BeautifulSoup) -> Dict[str, BeautifulSoup]:
|
||||||
|
sections: Dict[str, BeautifulSoup] = {}
|
||||||
|
current = None
|
||||||
|
current_key = None
|
||||||
|
for node in soup.find_all(["h2","h3","h4","p","ul","ol","div","section"]):
|
||||||
|
if node.name in ("h2","h3","h4"):
|
||||||
|
title = clean_text(node.get_text())
|
||||||
|
key = None
|
||||||
|
tl = title.lower()
|
||||||
|
if "game pass" in tl:
|
||||||
|
key = "xgp"
|
||||||
|
elif "egs" in tl or "epic" in tl:
|
||||||
|
key = "egs"
|
||||||
|
elif "ps plus" in tl or "ps+" in tl:
|
||||||
|
key = "psplus"
|
||||||
|
if key:
|
||||||
|
current_key = key
|
||||||
|
current = sections[key] = soup.new_tag("div")
|
||||||
|
continue
|
||||||
|
if current_key and current is not None:
|
||||||
|
current.append(node)
|
||||||
|
return sections
|
||||||
|
|
||||||
|
def parse_xgp(section: BeautifulSoup) -> List[Dict]:
|
||||||
|
items = []
|
||||||
|
for a in section.find_all("a", href=True):
|
||||||
|
href = a["href"]
|
||||||
|
m = MS_STORE_RE.search(href)
|
||||||
|
title = clean_text(a.get_text())
|
||||||
|
if m or title:
|
||||||
|
productId = m.group(1) if m else None
|
||||||
|
items.append({"title": title, "productId": productId})
|
||||||
|
uniq, seen = [], set()
|
||||||
|
for it in items:
|
||||||
|
k = xgp_key(it)
|
||||||
|
if k not in seen:
|
||||||
|
uniq.append(it); seen.add(k)
|
||||||
|
return uniq
|
||||||
|
|
||||||
|
def parse_egs(section: BeautifulSoup) -> List[Dict]:
|
||||||
|
items = []
|
||||||
|
for a in section.find_all("a", href=True):
|
||||||
|
if not EPIC_RE.search(a["href"]):
|
||||||
|
continue
|
||||||
|
title = clean_text(a.get_text()) or clean_text(a.get("title"))
|
||||||
|
items.append({"title": title, "start": ""})
|
||||||
|
uniq, seen = [], set()
|
||||||
|
for it in items:
|
||||||
|
k = egs_key(it)
|
||||||
|
if k not in seen:
|
||||||
|
uniq.append(it); seen.add(k)
|
||||||
|
return uniq
|
||||||
|
|
||||||
|
def parse_psplus(section: BeautifulSoup, post_title: str) -> Optional[Dict]:
|
||||||
|
a = section.find("a", href=PSBLOG_RE)
|
||||||
|
url = a["href"] if a else ""
|
||||||
|
m = re.search(r"(\d{2})-(\d{2})-(\d{4})", post_title)
|
||||||
|
iso = ""
|
||||||
|
if m:
|
||||||
|
d, mth, y = m.group(1), m.group(2), m.group(3)
|
||||||
|
iso = f"{y}-{mth}-{d}"
|
||||||
|
return {"url": url, "date": iso}
|
||||||
|
|
||||||
|
# ---------------- Main backfill ----------------
|
||||||
|
def backfill():
|
||||||
|
# Use the same env your main script uses; GhostAdmin will read them internally or
|
||||||
|
# you can pass them explicitly if your class expects (base_url, admin_key).
|
||||||
|
ghost = GhostAdmin(
|
||||||
|
admin_url=os.environ.get("GHOST_ADMIN_URL", "").rstrip("/") + "/",
|
||||||
|
admin_key=os.environ.get("GHOST_ADMIN_KEY", "")
|
||||||
|
)
|
||||||
|
store = Storage()
|
||||||
|
|
||||||
|
posts = list_recap_posts(ghost)
|
||||||
|
print(f"Found {len(posts)} recap posts.")
|
||||||
|
|
||||||
|
total_xgp = total_egs = total_ps = 0
|
||||||
|
|
||||||
|
dedup = []
|
||||||
|
|
||||||
|
for p in posts:
|
||||||
|
|
||||||
|
pid = p["id"]
|
||||||
|
title = p.get("title") or ""
|
||||||
|
html_body = p.get("html") or ""
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_body, "html.parser")
|
||||||
|
sections = extract_sections(soup)
|
||||||
|
|
||||||
|
for it in parse_xgp(sections.get("xgp", BeautifulSoup("", "html.parser"))):
|
||||||
|
key = xgp_key(it)
|
||||||
|
if not key in dedup:
|
||||||
|
store.remember("xgp", key, pid); total_xgp += 1
|
||||||
|
dedup.append(key)
|
||||||
|
|
||||||
|
# for it in parse_egs(sections.get("egs", BeautifulSoup("", "html.parser"))):
|
||||||
|
# store.remember("egs", egs_key(it), pid); total_egs += 1
|
||||||
|
# if "psplus" in sections:
|
||||||
|
# item = parse_psplus(sections["psplus"], title)
|
||||||
|
# store.remember("psplus", psplus_key(item), pid); total_ps += 1
|
||||||
|
|
||||||
|
print(f"Backfilled from: {title}")
|
||||||
|
|
||||||
|
print(f"Done. Inserted ~ XGP:{total_xgp} | EGS:{total_egs} | PS+:{total_ps}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
backfill()
|
||||||
1
cookies.json
Normal file
1
cookies.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"__cf_bm": "95up0icsYyESvD6suTUFG05xaWxwEr5_xuHUOv32G9I-1720025055-1.0.1.1-NlvsLW9j26FX8aPpLmVETEJ0zd.VyXefLr75kvT6iC.zHnPtkbIWgfesI0VaUGuvwV62qHpctJEoahLR9TIuHQ", "ab_experiment_sampled": "%22false%22", "ab_testing_id": "%22a6e7ba67-7dc0-452c-a935-d2f2bddd5edf%22", "ajs_anonymous_id": "%22e4535e95-1c5b-4173-82db-47807c57fb38%22", "cookie_storage_key": "f666a42c-49e8-47a2-bdbc-6eece0d6a06e", "substack.sid": "s%3ARLYSI2_XaTlGuYIpTYWjS8ib48PpuE0S.jNwCzcGzKUvUAuFdLNdfgxwewTUawIoDDZ05moubvzM", "visit_id": "%7B%22id%22%3A%22a0d46be8-56f4-406f-b1d7-14c41369b737%22%2C%22timestamp%22%3A%222024-07-03T16%3A44%3A13.349Z%22%7D", "AWSALBTG": "yw2xMbYVFbKWSzJiQsdCKp7mMH+wQ5T4/JIUc1TvywUi5iIJVXuO21AMhb+oPgegicdtpekLTDTl+zWKEekRsurS7+20skhmPxZXJf/Tl7jBd/PecbW7qa3DHkPvQtWz+SWD8+7P1rNjmY9lmyZgzH/ZeGgeiishRz9gsGO0OT/d", "AWSALBTGCORS": "yw2xMbYVFbKWSzJiQsdCKp7mMH+wQ5T4/JIUc1TvywUi5iIJVXuO21AMhb+oPgegicdtpekLTDTl+zWKEekRsurS7+20skhmPxZXJf/Tl7jBd/PecbW7qa3DHkPvQtWz+SWD8+7P1rNjmY9lmyZgzH/ZeGgeiishRz9gsGO0OT/d"}
|
||||||
9
docker-compose.yml
Normal file
9
docker-compose.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
version: '3.3'
|
||||||
|
|
||||||
|
services:
|
||||||
|
substackjv:
|
||||||
|
build: .
|
||||||
|
volumes:
|
||||||
|
- /path/to/your/host/directory:/data
|
||||||
|
environment:
|
||||||
|
- TZ=Europe/Brussels
|
||||||
7
env.bat
Normal file
7
env.bat
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
set GHOST_ADMIN_KEY=68bad0e13546e700012dd65d:116a81b7e189d3b3d3b86082f97ef65daedb06498a3f1f902b8e0c08d095dc19
|
||||||
|
set GHOST_ADMIN_URL=https://ghostadmin.zep.best/ghost/api/admin/__bot/FF4440EBA737506D397C170A8422109C357AA7582F10938B7C5F11D6B652F5D4
|
||||||
|
set GHOST_EMAIL_SEGMENT=status:free
|
||||||
|
set GHOST_NEWSLETTER_SLUG=default-newsletter
|
||||||
|
set GHOST_CONTENT_URL=https://ghost.zep.best
|
||||||
|
set DB_FILE_FALLBACK=f:\workspace\Substack_JV\data\published.db
|
||||||
|
set MISTRAL_API_KEY=tQJHvYlmwz1ihKxOhXS3FmDNTRhBh6b3
|
||||||
15
feeds.txt
Normal file
15
feeds.txt
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
https://www.factornews.com/rss.xml
|
||||||
|
https://nofrag.com/feed
|
||||||
|
https://dystopeek.fr/feed/
|
||||||
|
https://thepixelpost.com/rss/
|
||||||
|
https://yamukass.substack.com/feed
|
||||||
|
https://tseret.com/categorie/tests/feed
|
||||||
|
https://www.gamesidestory.com/feed
|
||||||
|
https://www.nintendo-town.fr/feed
|
||||||
|
https://jesuisungameur.com/feed
|
||||||
|
https://www.switch-actu.fr/categorie/tests/tests-de-jeux/feed
|
||||||
|
https://www.playscope.com/category/articles/test-gaming/feed
|
||||||
|
https://jrpgfr.net/category/test/feed
|
||||||
|
https://jv.jeuxonline.info/rss/dossiers/rss.xml
|
||||||
|
https://www.youtube.com/feeds/videos.xml?channel_id=UC-OvBDfZGn1OdsqMBwkOI_A
|
||||||
|
https://www.youtube.com/feeds/videos.xml?playlist_id=PLZRiqJjIUlDTrwYs_UqEIts5fVaBpaIEz
|
||||||
20
keys.py
Normal file
20
keys.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
# keys.py (or inline in your main)
|
||||||
|
def xgp_key(item) -> str:
|
||||||
|
# Prefer stable Microsoft Store productId if present; fallback to normalized title.
|
||||||
|
pid = (item.get("productId") or "").strip()
|
||||||
|
if pid:
|
||||||
|
return f"item:xgp:{pid}"
|
||||||
|
title = (item.get("title") or "").strip().lower()
|
||||||
|
return f"item:xgp:title:{title}"
|
||||||
|
|
||||||
|
def egs_key(item) -> str:
|
||||||
|
# Use title + start window (your fetcher usually knows the free-week start)
|
||||||
|
title = (item.get("title") or "").strip()
|
||||||
|
start = (item.get("start") or "").strip() # ISO or YYYY-MM-DD
|
||||||
|
return f"item:egs:{title}|{start}"
|
||||||
|
|
||||||
|
def psplus_key(item) -> str:
|
||||||
|
# Use official PS Blog URL + the published month (or your computed date)
|
||||||
|
url = (item.get("url") or "").strip()
|
||||||
|
date = (item.get("date") or "").strip()
|
||||||
|
return f"item:psplus:{url}|{date}"
|
||||||
928
post_rss_to_ghost.py
Normal file
928
post_rss_to_ghost.py
Normal file
@@ -0,0 +1,928 @@
|
|||||||
|
import asyncio
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import html
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
from typing import Optional, List, Dict
|
||||||
|
import feedparser
|
||||||
|
import requests
|
||||||
|
import jwt
|
||||||
|
import zoneinfo # Python 3.9+
|
||||||
|
from urllib.parse import urlparse, parse_qs, urljoin
|
||||||
|
|
||||||
|
# ------------- Web Crawler for Images -------------
|
||||||
|
|
||||||
|
def extract_image_from_url(url: str, timeout: int = 10) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Fetch a webpage and extract the best image (og:image, twitter:image, or first large image).
|
||||||
|
Returns the image URL or None.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
url,
|
||||||
|
timeout=timeout,
|
||||||
|
headers={
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
"Accept": "text/html,application/xhtml+xml",
|
||||||
|
},
|
||||||
|
allow_redirects=True,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
html_content = resp.text
|
||||||
|
|
||||||
|
# Try OpenGraph image first (most reliable)
|
||||||
|
og_match = re.search(r'<meta[^>]+property=["\']og:image["\'][^>]+content=["\']([^"\']+)["\']', html_content, re.IGNORECASE)
|
||||||
|
if not og_match:
|
||||||
|
og_match = re.search(r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+property=["\']og:image["\']', html_content, re.IGNORECASE)
|
||||||
|
if og_match:
|
||||||
|
img_url = og_match.group(1)
|
||||||
|
return urljoin(url, img_url)
|
||||||
|
|
||||||
|
# Try Twitter card image
|
||||||
|
tw_match = re.search(r'<meta[^>]+name=["\']twitter:image["\'][^>]+content=["\']([^"\']+)["\']', html_content, re.IGNORECASE)
|
||||||
|
if not tw_match:
|
||||||
|
tw_match = re.search(r'<meta[^>]+content=["\']([^"\']+)["\'][^>]+name=["\']twitter:image["\']', html_content, re.IGNORECASE)
|
||||||
|
if tw_match:
|
||||||
|
img_url = tw_match.group(1)
|
||||||
|
return urljoin(url, img_url)
|
||||||
|
|
||||||
|
# Fallback: look for article/main image
|
||||||
|
article_img = re.search(r'<article[^>]*>.*?<img[^>]+src=["\']([^"\']+)["\']', html_content, re.IGNORECASE | re.DOTALL)
|
||||||
|
if article_img:
|
||||||
|
img_url = article_img.group(1)
|
||||||
|
# Skip tiny images, icons, avatars
|
||||||
|
if not any(skip in img_url.lower() for skip in ['avatar', 'icon', 'logo', 'emoji', '1x1', 'pixel']):
|
||||||
|
return urljoin(url, img_url)
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
LOG.debug("Failed to extract image from %s: %s", url, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------- YouTube helpers -------------
|
||||||
|
|
||||||
|
def fetch_youtube_oembed_html(youtube_url: str, timeout: int = 10) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Get YouTube oEmbed HTML exactly as provided and wrap it as a Ghost embed card.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
resp = requests.get(
|
||||||
|
"https://www.youtube.com/oembed",
|
||||||
|
params={"url": youtube_url, "format": "json"},
|
||||||
|
headers={"User-Agent": "ghost-bot/1.0"},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
html_content = data.get("html")
|
||||||
|
if not html_content:
|
||||||
|
return None
|
||||||
|
# Wrap in Ghost embed card container; do NOT alter the iframe attributes.
|
||||||
|
return f'<figure class="kg-card kg-embed-card">{html_content}</figure>'
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def youtube_thumbnail_url(video_id: str) -> str:
|
||||||
|
return f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
|
||||||
|
|
||||||
|
def extract_youtube_id(url: str) -> Optional[str]:
|
||||||
|
try:
|
||||||
|
u = urlparse(url)
|
||||||
|
host = u.netloc.lower()
|
||||||
|
if host.endswith("youtube.com"):
|
||||||
|
if u.path == "/watch":
|
||||||
|
return parse_qs(u.query).get("v", [None])[0]
|
||||||
|
m = re.match(r"^/(shorts/|live/)?([A-Za-z0-9_-]{6,})", u.path)
|
||||||
|
if m:
|
||||||
|
return m.group(2)
|
||||||
|
if host == "youtu.be":
|
||||||
|
slug = u.path.strip("/").split("/")[0]
|
||||||
|
return slug or None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ------------- Logging -------------
|
||||||
|
|
||||||
|
LOG = logging.getLogger("bot")
|
||||||
|
LOG_PATTERN = logging.Formatter("%(asctime)s:%(levelname)s: [%(filename)s] %(message)s")
|
||||||
|
|
||||||
|
def setuplogger():
|
||||||
|
stream_handler = logging.StreamHandler()
|
||||||
|
stream_handler.setFormatter(LOG_PATTERN)
|
||||||
|
stream_handler.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
file_handler = RotatingFileHandler("bot.log", "a", 1_000_000, 1)
|
||||||
|
file_handler.setFormatter(LOG_PATTERN)
|
||||||
|
|
||||||
|
LOG.setLevel(logging.DEBUG)
|
||||||
|
LOG.addHandler(stream_handler)
|
||||||
|
LOG.addHandler(file_handler)
|
||||||
|
|
||||||
|
# ------------- Model -------------
|
||||||
|
|
||||||
|
class RSSfeed:
|
||||||
|
def __init__(self, url: str, yt: bool = False):
|
||||||
|
self.url = url
|
||||||
|
self.youtube = yt
|
||||||
|
|
||||||
|
# ------------- Mistral AI Client -------------
|
||||||
|
|
||||||
|
class MistralClient:
|
||||||
|
"""Client for Mistral AI API to filter and group news items."""
|
||||||
|
|
||||||
|
def __init__(self, api_key: str, model: str = "mistral-small-latest"):
|
||||||
|
self.api_key = api_key
|
||||||
|
self.model = model
|
||||||
|
self.base_url = "https://api.mistral.ai/v1/chat/completions"
|
||||||
|
|
||||||
|
def _call_api(self, messages: List[Dict], temperature: float = 0.3) -> Optional[str]:
|
||||||
|
"""Make a call to the Mistral API."""
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
payload = {
|
||||||
|
"model": self.model,
|
||||||
|
"messages": messages,
|
||||||
|
"temperature": temperature,
|
||||||
|
"response_format": {"type": "json_object"},
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
resp = requests.post(self.base_url, headers=headers, json=payload, timeout=120)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()["choices"][0]["message"]["content"]
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error("Mistral API error: %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def filter_news_items(self, items: List[dict], dry_run: bool = False) -> List[dict]:
|
||||||
|
"""
|
||||||
|
Filter out non-news items (tips, walkthroughs, guides, tutorials).
|
||||||
|
Returns only actual game news items.
|
||||||
|
"""
|
||||||
|
if not items:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Prepare items for analysis
|
||||||
|
items_for_analysis = []
|
||||||
|
for i, item in enumerate(items):
|
||||||
|
items_for_analysis.append({
|
||||||
|
"id": i,
|
||||||
|
"title": item.get("title", ""),
|
||||||
|
"link": item.get("link", ""),
|
||||||
|
"summary": (item.get("summary", "") or "")[:300], # Truncate for API
|
||||||
|
})
|
||||||
|
|
||||||
|
# Split into batches to avoid token limits
|
||||||
|
batch_size = 50
|
||||||
|
filtered_ids = set()
|
||||||
|
|
||||||
|
for batch_start in range(0, len(items_for_analysis), batch_size):
|
||||||
|
batch = items_for_analysis[batch_start:batch_start + batch_size]
|
||||||
|
|
||||||
|
prompt = f"""Tu analyses des articles de sites de jeux vidéo. Tu dois identifier UNIQUEMENT les articles à EXCLURE.
|
||||||
|
|
||||||
|
EXCLURE UNIQUEMENT si le titre contient EXPLICITEMENT UN de ces mots-clés:
|
||||||
|
- "guide" (le mot exact)
|
||||||
|
- "soluce" (le mot exact)
|
||||||
|
- "astuce" (le mot exact)
|
||||||
|
- "solution" (le mot exact, pas "résolution")
|
||||||
|
- "code promo"
|
||||||
|
- "bon plan"
|
||||||
|
- "-20%" ou "-30%" etc (réductions)
|
||||||
|
- "tuto" ou "tutoriel"
|
||||||
|
- "comment faire"
|
||||||
|
- "how to"
|
||||||
|
|
||||||
|
NE JAMAIS EXCLURE:
|
||||||
|
- "Early Access" = news de sortie anticipée, À GARDER
|
||||||
|
- "Test" ou "Review" = critique, À GARDER
|
||||||
|
- "Partie Rapide" = émission/podcast, À GARDER
|
||||||
|
- Tout article de news, annonce, sortie, preview
|
||||||
|
- Tout article d'opinion, éditorial, récap
|
||||||
|
- Tout le reste qui ne contient pas les mots-clés d'exclusion ci-dessus
|
||||||
|
|
||||||
|
Articles à analyser:
|
||||||
|
{json.dumps(batch, ensure_ascii=False, indent=2)}
|
||||||
|
|
||||||
|
Retourne un JSON avec "exclude_ids" contenant UNIQUEMENT les IDs des articles guides/soluces/promos.
|
||||||
|
Si aucun article ne correspond aux critères d'exclusion, retourne {{"exclude_ids": []}}
|
||||||
|
|
||||||
|
Sois TRÈS conservateur - en cas de doute, NE PAS exclure."""
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": prompt}]
|
||||||
|
response = self._call_api(messages)
|
||||||
|
|
||||||
|
if response:
|
||||||
|
try:
|
||||||
|
result = json.loads(response)
|
||||||
|
excluded_ids = set(result.get("exclude_ids", []))
|
||||||
|
# Keep all items NOT in excluded_ids
|
||||||
|
batch_ids = {item["id"] for item in batch}
|
||||||
|
kept_ids = batch_ids - excluded_ids
|
||||||
|
filtered_ids.update(kept_ids)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
LOG.warning("Failed to parse Mistral response for filtering")
|
||||||
|
# Fallback: include all items from this batch
|
||||||
|
filtered_ids.update(item["id"] for item in batch)
|
||||||
|
else:
|
||||||
|
# Fallback: include all items from this batch
|
||||||
|
filtered_ids.update(item["id"] for item in batch)
|
||||||
|
|
||||||
|
# Log filtered out items in dry-run mode
|
||||||
|
if dry_run:
|
||||||
|
excluded_ids = set(range(len(items))) - filtered_ids
|
||||||
|
if excluded_ids:
|
||||||
|
LOG.debug("=== FILTERED OUT (non-news) ===")
|
||||||
|
for i in sorted(excluded_ids):
|
||||||
|
LOG.debug(" [EXCLUDED] %s", items[i].get("title", "No title"))
|
||||||
|
LOG.debug("=== KEPT (news) ===")
|
||||||
|
for i in sorted(filtered_ids):
|
||||||
|
if i < len(items):
|
||||||
|
LOG.debug(" [KEPT] %s", items[i].get("title", "No title"))
|
||||||
|
|
||||||
|
return [items[i] for i in sorted(filtered_ids) if i < len(items)]
|
||||||
|
|
||||||
|
def group_similar_items(self, items: List[dict]) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Group news items by category (News, Tests/Reviews, Previews, etc.)
|
||||||
|
with sub-groups by game/topic within each category.
|
||||||
|
Returns a list of categories, each with sub-groups containing items.
|
||||||
|
"""
|
||||||
|
if not items:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Prepare items for analysis
|
||||||
|
items_for_analysis = []
|
||||||
|
for i, item in enumerate(items):
|
||||||
|
items_for_analysis.append({
|
||||||
|
"id": i,
|
||||||
|
"title": item.get("title", ""),
|
||||||
|
"link": item.get("link", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
prompt = f"""Organise ces articles de jeux vidéo en CATÉGORIES et SOUS-GROUPES.
|
||||||
|
|
||||||
|
Articles à organiser:
|
||||||
|
{json.dumps(items_for_analysis, ensure_ascii=False, indent=2)}
|
||||||
|
|
||||||
|
CATÉGORIES (utilise ces noms exacts):
|
||||||
|
1. "Actualités" - News, annonces, sorties, mises à jour, industrie
|
||||||
|
2. "Tests & Critiques" - Reviews, tests, avis, notes
|
||||||
|
3. "Aperçus & Previews" - Previews, impressions, démos, hands-on
|
||||||
|
4. "Vidéos" - Trailers, gameplay vidéos, podcasts
|
||||||
|
5. "Autres" - Le reste
|
||||||
|
|
||||||
|
RÈGLES DE GROUPEMENT (TRÈS IMPORTANT):
|
||||||
|
- Groupe par FRANCHISE ou SÉRIE (ex: tous les "Final Fantasy" ensemble, même FF7, FF16, FF XIV)
|
||||||
|
- Groupe par UNIVERS (ex: "Warhammer 40K" = Space Marine + Dawn of War + Darktide)
|
||||||
|
- Groupe par ÉVÉNEMENT (ex: "Nintendo Direct", "State of Play", "Game Awards")
|
||||||
|
- N'utilise JAMAIS de noms de sites web comme groupes (pas "NoFrag", "JeuxOnline", etc.)
|
||||||
|
|
||||||
|
EXEMPLES DE GROUPEMENTS CORRECTS:
|
||||||
|
- "Final Fantasy VII Rebirth sur Switch 2" + "Final Fantasy VII Remake Intergrade en tête" → groupe "Final Fantasy"
|
||||||
|
- "Techmarine dans Space Marine 2" + "Dawn of War 4 gameplay Ork" → groupe "Warhammer 40K"
|
||||||
|
- "GTA 6 trailer" + "GTA 6 date de sortie" → groupe "GTA 6"
|
||||||
|
- "Nintendo Direct annoncé" + "Zelda dans le Nintendo Direct" → groupe "Nintendo Direct"
|
||||||
|
|
||||||
|
Retourne ce JSON:
|
||||||
|
{{
|
||||||
|
"categories": [
|
||||||
|
{{
|
||||||
|
"name": "Actualités",
|
||||||
|
"subgroups": [
|
||||||
|
{{"title": "Final Fantasy", "item_ids": [0, 3, 7]}},
|
||||||
|
{{"title": "Warhammer 40K", "item_ids": [1, 2]}},
|
||||||
|
{{"title": "Steam", "item_ids": [5]}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
IMPORTANT: Chaque article dans UN SEUL sous-groupe. Titre = nom de franchise/série/univers, PAS nom de site."""
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": prompt}]
|
||||||
|
response = self._call_api(messages, temperature=0.2)
|
||||||
|
|
||||||
|
if response:
|
||||||
|
try:
|
||||||
|
result = json.loads(response)
|
||||||
|
categories = []
|
||||||
|
used_ids = set()
|
||||||
|
|
||||||
|
for cat_data in result.get("categories", []):
|
||||||
|
cat_name = cat_data.get("name", "Autres")
|
||||||
|
subgroups = []
|
||||||
|
|
||||||
|
for sg_data in cat_data.get("subgroups", []):
|
||||||
|
sg_title = sg_data.get("title", "Divers")
|
||||||
|
item_ids = sg_data.get("item_ids", [])
|
||||||
|
|
||||||
|
# Filter to valid, unused IDs
|
||||||
|
valid_ids = [i for i in item_ids if i < len(items) and i not in used_ids]
|
||||||
|
if valid_ids:
|
||||||
|
used_ids.update(valid_ids)
|
||||||
|
subgroups.append({
|
||||||
|
"title": sg_title,
|
||||||
|
"items": [items[i] for i in valid_ids]
|
||||||
|
})
|
||||||
|
|
||||||
|
if subgroups:
|
||||||
|
categories.append({
|
||||||
|
"name": cat_name,
|
||||||
|
"subgroups": subgroups
|
||||||
|
})
|
||||||
|
|
||||||
|
# Add any ungrouped items
|
||||||
|
ungrouped = [items[i] for i in range(len(items)) if i not in used_ids]
|
||||||
|
if ungrouped:
|
||||||
|
# Find or create "Autres" category
|
||||||
|
autres_cat = next((c for c in categories if c["name"] == "Autres"), None)
|
||||||
|
if autres_cat:
|
||||||
|
autres_cat["subgroups"].append({"title": "Divers", "items": ungrouped})
|
||||||
|
else:
|
||||||
|
categories.append({
|
||||||
|
"name": "Autres",
|
||||||
|
"subgroups": [{"title": "Divers", "items": ungrouped}]
|
||||||
|
})
|
||||||
|
|
||||||
|
return categories
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
LOG.warning("Failed to parse Mistral response for grouping")
|
||||||
|
|
||||||
|
# Fallback: return all items in a single category/subgroup
|
||||||
|
return [{
|
||||||
|
"name": "Actualités de la semaine",
|
||||||
|
"subgroups": [{"title": "Toutes les news", "items": items}]
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
# ------------- Ghost Admin API client -------------
|
||||||
|
|
||||||
|
class GhostAdmin:
|
||||||
|
def __init__(self, admin_url: str, admin_key: str, accept_version: str = "v6.0"):
|
||||||
|
self.base = admin_url.rstrip("/") + "/"
|
||||||
|
self.key_id, self.key_secret_hex = admin_key.split(":")
|
||||||
|
self.accept_version = accept_version
|
||||||
|
|
||||||
|
|
||||||
|
def _jwt(self) -> str:
|
||||||
|
iat = int(time.time())
|
||||||
|
payload = {"iat": iat, "exp": iat + 5 * 60, "aud": "/admin/"}
|
||||||
|
headers = {"alg": "HS256", "typ": "JWT", "kid": self.key_id}
|
||||||
|
token = jwt.encode(payload, bytes.fromhex(self.key_secret_hex), algorithm="HS256", headers=headers)
|
||||||
|
return token if isinstance(token, str) else token.decode("utf-8")
|
||||||
|
|
||||||
|
def _headers(self):
|
||||||
|
return {
|
||||||
|
"Authorization": f"Ghost {self._jwt()}",
|
||||||
|
"Accept-Version": self.accept_version,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
def latest_published_date(self, tz_name: str = "Europe/Brussels"):
|
||||||
|
"""
|
||||||
|
Date (aware) de la dernière publication (status=published), ou None.
|
||||||
|
"""
|
||||||
|
url = self.base + "posts/?limit=1&order=published_at%20desc&fields=published_at"
|
||||||
|
resp = requests.get(url, headers=self._headers(), timeout=20)
|
||||||
|
resp.raise_for_status()
|
||||||
|
posts = resp.json().get("posts", [])
|
||||||
|
if not posts or not posts[0].get("published_at"):
|
||||||
|
return None
|
||||||
|
# ISO 8601 → aware UTC → converti tz locale
|
||||||
|
dtu = dt.datetime.fromisoformat(posts[0]["published_at"].replace("Z", "+00:00"))
|
||||||
|
return dtu.astimezone(zoneinfo.ZoneInfo(tz_name))
|
||||||
|
|
||||||
|
def get_newsletters(self):
|
||||||
|
url = self.base + "newsletters/"
|
||||||
|
resp = requests.get(url, headers=self._headers(), timeout=20)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
raise RuntimeError(f"Ghost newsletters error {resp.status_code}: {resp.text}")
|
||||||
|
return resp.json().get("newsletters", [])
|
||||||
|
|
||||||
|
def pick_newsletter_slug(self, preferred_slug: Optional[str]) -> str:
|
||||||
|
if preferred_slug:
|
||||||
|
return preferred_slug
|
||||||
|
nls = self.get_newsletters()
|
||||||
|
if not nls:
|
||||||
|
raise RuntimeError("No newsletters configured in Ghost (Settings → Newsletters).")
|
||||||
|
actives = [n for n in nls if n.get("status") == "active"]
|
||||||
|
for n in actives:
|
||||||
|
if n.get("is_default"):
|
||||||
|
return n.get("slug")
|
||||||
|
return (actives or nls)[0].get("slug")
|
||||||
|
|
||||||
|
def create_post_html(self, title: str, html_content: str, status: str = "draft", feature_image: Optional[str] = None):
|
||||||
|
url = self.base + "posts/?source=html"
|
||||||
|
post = {"title": title, "html": html_content, "status": status}
|
||||||
|
if feature_image:
|
||||||
|
post["feature_image"] = feature_image
|
||||||
|
resp = requests.post(url, headers=self._headers(), json={"posts": [post]}, timeout=30)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
raise RuntimeError(f"Ghost create error {resp.status_code}: {resp.text}")
|
||||||
|
return resp.json()["posts"][0]
|
||||||
|
|
||||||
|
def publish_post(self, post_id: str, updated_at: str, newsletter_slug: Optional[str], email_segment: Optional[str]):
|
||||||
|
slug = self.pick_newsletter_slug(newsletter_slug)
|
||||||
|
params = [f"newsletter={requests.utils.quote(slug)}"]
|
||||||
|
if email_segment:
|
||||||
|
params.append(f"email_segment={requests.utils.quote(email_segment)}")
|
||||||
|
url = self.base + f"posts/{post_id}/?{'&'.join(params)}"
|
||||||
|
body = {"posts": [{"updated_at": updated_at, "status": "published"}]}
|
||||||
|
resp = requests.put(url, headers=self._headers(), json=body, timeout=30)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
raise RuntimeError(f"Ghost publish error {resp.status_code}: {resp.text}")
|
||||||
|
return resp.json()["posts"][0]
|
||||||
|
|
||||||
|
# ------------- Task orchestration -------------
|
||||||
|
|
||||||
|
class GhostTask:
|
||||||
|
def __init__(self, feeds: List[RSSfeed], admin_url: str, admin_key: str,
|
||||||
|
mistral_api_key: Optional[str] = None,
|
||||||
|
newsletter_slug: Optional[str] = None, email_segment: Optional[str] = None,
|
||||||
|
dry_run: bool = False):
|
||||||
|
self.ghost = GhostAdmin(admin_url, admin_key)
|
||||||
|
self.feeds = feeds
|
||||||
|
self.newsletter_slug = newsletter_slug
|
||||||
|
self.email_segment = email_segment
|
||||||
|
self.mistral = MistralClient(mistral_api_key) if mistral_api_key else None
|
||||||
|
self.dry_run = dry_run
|
||||||
|
for feed in self.feeds:
|
||||||
|
LOG.info("Adding feed %s", feed.url)
|
||||||
|
|
||||||
|
# --- startup immediate run if not yet published this week
|
||||||
|
|
||||||
|
def _published_this_week(self) -> bool:
|
||||||
|
"""Check if we already published this week (since last Saturday 12:00)."""
|
||||||
|
tz = zoneinfo.ZoneInfo("Europe/Brussels")
|
||||||
|
last = self.ghost.latest_published_date("Europe/Brussels")
|
||||||
|
if not last:
|
||||||
|
return False
|
||||||
|
|
||||||
|
now = dt.datetime.now(tz)
|
||||||
|
# Find last Saturday at 12:00
|
||||||
|
days_since_saturday = (now.weekday() - 5) % 7 # Saturday = 5
|
||||||
|
last_saturday = (now - dt.timedelta(days=days_since_saturday)).replace(
|
||||||
|
hour=12, minute=0, second=0, microsecond=0
|
||||||
|
)
|
||||||
|
|
||||||
|
return last >= last_saturday
|
||||||
|
|
||||||
|
async def maybe_run_this_week(self):
|
||||||
|
if not self._published_this_week():
|
||||||
|
LOG.info("Aucune newsletter publiée cette semaine -> génération immédiate.")
|
||||||
|
await self.weekly_task()
|
||||||
|
else:
|
||||||
|
LOG.info("Déjà publié cette semaine, on attend la prochaine fenêtre.")
|
||||||
|
|
||||||
|
# --- utils
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _fr_week_range() -> str:
|
||||||
|
"""Returns a French formatted date range for the past week."""
|
||||||
|
months = {
|
||||||
|
'January': 'Janvier', 'February': 'Février', 'March': 'Mars', 'April': 'Avril',
|
||||||
|
'May': 'Mai', 'June': 'Juin', 'July': 'Juillet', 'August': 'Août',
|
||||||
|
'September': 'Septembre', 'October': 'Octobre', 'November': 'Novembre', 'December': 'Décembre'
|
||||||
|
}
|
||||||
|
today = dt.datetime.now()
|
||||||
|
week_ago = today - dt.timedelta(days=7)
|
||||||
|
|
||||||
|
# Format: "24 - 31 Janvier 2025" or "28 Janvier - 4 Février 2025"
|
||||||
|
if week_ago.month == today.month:
|
||||||
|
formatted = f"{week_ago.day} - {today.strftime('%d %B %Y')}"
|
||||||
|
else:
|
||||||
|
formatted = f"{week_ago.strftime('%d %B')} - {today.strftime('%d %B %Y')}"
|
||||||
|
|
||||||
|
for en, fr in months.items():
|
||||||
|
formatted = formatted.replace(en, fr)
|
||||||
|
return formatted
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _safe_get(url: str, timeout: int = 20) -> Optional[bytes]:
|
||||||
|
try:
|
||||||
|
r = requests.get(url, timeout=timeout, headers={"User-Agent": "ghost-bot/1.0"})
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.content
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning("Flux indisponible: %s (%s)", url, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _entry_datetime(entry) -> Optional[dt.datetime]:
|
||||||
|
"""
|
||||||
|
Tente de récupérer une datetime aware (UTC) pour un item feedparser.
|
||||||
|
"""
|
||||||
|
# Try common fields first
|
||||||
|
if getattr(entry, "published", None):
|
||||||
|
try:
|
||||||
|
# YouTube (ISO) e.g. 2025-09-05T10:20:33+00:00
|
||||||
|
return dt.datetime.fromisoformat(entry.published.replace("Z", "+00:00")).astimezone(dt.timezone.utc)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
# RFC822 e.g. Fri, 05 Sep 2025 10:20:33 +0000
|
||||||
|
return dt.datetime.strptime(entry.published.replace('GMT', '+0000'),
|
||||||
|
'%a, %d %b %Y %H:%M:%S %z').astimezone(dt.timezone.utc)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if getattr(entry, "updated_parsed", None):
|
||||||
|
try:
|
||||||
|
return dt.datetime.fromtimestamp(time.mktime(entry.updated_parsed), tz=dt.timezone.utc)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
# --- HTML builder for grouped content
|
||||||
|
|
||||||
|
def _build_html_roundup_grouped(self, categories: List[Dict], feeds: List[RSSfeed]):
|
||||||
|
"""
|
||||||
|
Construit le HTML avec des catégories et sous-groupes thématiques.
|
||||||
|
Inclut un résumé et une table des matières en haut.
|
||||||
|
Retourne (html, feature_image_url_ou_None).
|
||||||
|
"""
|
||||||
|
parts: List[str] = []
|
||||||
|
first_image: Optional[str] = None
|
||||||
|
|
||||||
|
# --- Build Summary Section ---
|
||||||
|
parts.append('<h2>✨ En bref cette semaine</h2>')
|
||||||
|
parts.append('<ul>')
|
||||||
|
|
||||||
|
for cat in categories:
|
||||||
|
cat_name = cat.get("name", "Actualités")
|
||||||
|
subgroups = cat.get("subgroups", [])
|
||||||
|
|
||||||
|
# Get top subgroups with more than 1 item (by item count) for summary
|
||||||
|
multi_item_subgroups = [sg for sg in subgroups if len(sg.get("items", [])) > 1]
|
||||||
|
sorted_subgroups = sorted(multi_item_subgroups, key=lambda sg: len(sg.get("items", [])), reverse=True)
|
||||||
|
top_subgroups = sorted_subgroups[:5] # Max 5 highlights per category
|
||||||
|
|
||||||
|
total_items = sum(len(sg.get("items", [])) for sg in subgroups)
|
||||||
|
if top_subgroups:
|
||||||
|
highlights = ", ".join(sg.get("title", "Divers") for sg in top_subgroups)
|
||||||
|
total_items = sum(len(sg.get("items", [])) for sg in subgroups)
|
||||||
|
parts.append(f'<li><strong>{html.escape(cat_name)}</strong>: {html.escape(highlights)} ({total_items} articles)</li>')
|
||||||
|
elif total_items > 0:
|
||||||
|
parts.append(f'<li><strong>{html.escape(cat_name)}</strong>: {total_items} articles</li>')
|
||||||
|
|
||||||
|
parts.append('</ul>')
|
||||||
|
parts.append('<hr>')
|
||||||
|
|
||||||
|
# --- Build Table of Contents ---
|
||||||
|
# parts.append('<h2>📋 Sommaire</h2>')
|
||||||
|
# parts.append('<ul>')
|
||||||
|
|
||||||
|
# for cat in categories:
|
||||||
|
# cat_name = cat.get("name", "Actualités")
|
||||||
|
# cat_anchor = self._make_anchor(cat_name)
|
||||||
|
# subgroups = cat.get("subgroups", [])
|
||||||
|
# total_items = sum(len(sg.get("items", [])) for sg in subgroups)
|
||||||
|
|
||||||
|
# parts.append(f'<li><a href="#{cat_anchor}"><strong>{html.escape(cat_name)}</strong></a> ({total_items} articles)')
|
||||||
|
|
||||||
|
# if len(subgroups) > 1 or (len(subgroups) == 1 and len(subgroups[0].get("items", [])) > 1):
|
||||||
|
# parts.append('<ul>')
|
||||||
|
# for sg in subgroups:
|
||||||
|
# sg_title = sg.get("title", "Divers")
|
||||||
|
# sg_anchor = self._make_anchor(f"{cat_name}-{sg_title}")
|
||||||
|
# item_count = len(sg.get("items", []))
|
||||||
|
# parts.append(f'<li><a href="#{sg_anchor}">{html.escape(sg_title)}</a> ({item_count})</li>')
|
||||||
|
# parts.append('</ul>')
|
||||||
|
|
||||||
|
# parts.append('</li>')
|
||||||
|
|
||||||
|
# parts.append('</ul>')
|
||||||
|
# parts.append('<hr>')
|
||||||
|
|
||||||
|
# --- Build Content by Category ---
|
||||||
|
for cat in categories:
|
||||||
|
cat_name = cat.get("name", "Actualités")
|
||||||
|
cat_anchor = self._make_anchor(cat_name)
|
||||||
|
subgroups = cat.get("subgroups", [])
|
||||||
|
|
||||||
|
if not subgroups:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Category header with emoji
|
||||||
|
cat_emoji = {
|
||||||
|
"Actualités": "📰",
|
||||||
|
"Tests & Critiques": "⭐",
|
||||||
|
"Aperçus & Previews": "👁️",
|
||||||
|
"Vidéos": "🎬",
|
||||||
|
"Autres": "📁"
|
||||||
|
}.get(cat_name, "📌")
|
||||||
|
|
||||||
|
parts.append(f'<h2 id="{cat_anchor}">{cat_emoji} {html.escape(cat_name)}</h2>')
|
||||||
|
|
||||||
|
for sg in subgroups:
|
||||||
|
sg_title = sg.get("title", "Divers")
|
||||||
|
sg_anchor = self._make_anchor(f"{cat_name}-{sg_title}")
|
||||||
|
items = sg.get("items", [])
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sub-group header (only if more than 1 item in subgroup)
|
||||||
|
if len(items) > 1:
|
||||||
|
parts.append(f'<h3 id="{sg_anchor}">{html.escape(sg_title)}</h3>')
|
||||||
|
|
||||||
|
for post in items:
|
||||||
|
title = post.get("title", "") or ""
|
||||||
|
linkURL = post.get("link", "") or ""
|
||||||
|
parts.append(f'<h4>{html.escape(title)}</h4>')
|
||||||
|
|
||||||
|
# --- YouTube embed / fallback
|
||||||
|
vid = post.get("yt_videoid") or extract_youtube_id(linkURL)
|
||||||
|
if vid:
|
||||||
|
watch_url = f"https://www.youtube.com/watch?v={vid}"
|
||||||
|
|
||||||
|
# Try provider HTML via oEmbed (as Ghost does)
|
||||||
|
embed_html = fetch_youtube_oembed_html(watch_url, timeout=10)
|
||||||
|
if embed_html:
|
||||||
|
parts.append(embed_html)
|
||||||
|
else:
|
||||||
|
# Fallback: leave the plain URL on its own line so Ghost may still auto-embed
|
||||||
|
parts.append(f'\n<p>{watch_url}</p>\n')
|
||||||
|
|
||||||
|
# Minimal fallback link (non-intrusive for email/web)
|
||||||
|
parts.append(f'<p><a href="{watch_url}">Voir sur YouTube</a></p>')
|
||||||
|
else:
|
||||||
|
# --- Texte + lien
|
||||||
|
ftext = ""
|
||||||
|
if "summary" in post and post["summary"]:
|
||||||
|
ftext = html.unescape(post["summary"])
|
||||||
|
ftext = re.sub("<[^<]+?>", "", ftext)
|
||||||
|
ftext = re.sub(r"L'article .* est apparu en premier sur .*", "", ftext)
|
||||||
|
if ftext:
|
||||||
|
parts.append(f"<p>{html.escape(ftext)}</p>")
|
||||||
|
if linkURL:
|
||||||
|
esc = html.escape(linkURL)
|
||||||
|
parts.append(f'<p><a href="{esc}">{esc}</a></p>')
|
||||||
|
|
||||||
|
# --- Images: first try RSS metadata, then crawl the page
|
||||||
|
found_image = False
|
||||||
|
for link in post.get("links", []) or []:
|
||||||
|
if link.get("type") in ("image/jpg", "image/jpeg", "image/png", "image/webp"):
|
||||||
|
imgUrl = link.get("href")
|
||||||
|
if imgUrl:
|
||||||
|
imgUrl = imgUrl.replace("/250x250/", "/990x320/")
|
||||||
|
if not first_image:
|
||||||
|
first_image = imgUrl
|
||||||
|
parts.append(f'<figure><img src="{html.escape(imgUrl)}" loading="lazy"></figure>')
|
||||||
|
found_image = True
|
||||||
|
|
||||||
|
# If no image from RSS, try to extract from the article page
|
||||||
|
if not found_image and linkURL:
|
||||||
|
crawled_img = extract_image_from_url(linkURL, timeout=8)
|
||||||
|
if crawled_img:
|
||||||
|
if not first_image:
|
||||||
|
first_image = crawled_img
|
||||||
|
parts.append(f'<figure><img src="{html.escape(crawled_img)}" loading="lazy"></figure>')
|
||||||
|
|
||||||
|
parts.append('<hr>')
|
||||||
|
|
||||||
|
# --- Sources
|
||||||
|
parts.append("<h3>📚 Sources</h3>")
|
||||||
|
for feed in feeds:
|
||||||
|
esc = html.escape(feed.url)
|
||||||
|
parts.append(f'<p><a href="{esc}">{esc}</a></p>')
|
||||||
|
|
||||||
|
parts.append('<p><em>Abonnez-vous pour recevoir chaque semaine les news et soutenir mon travail.</em></p>')
|
||||||
|
return "\n".join(parts), first_image
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_anchor(text: str) -> str:
|
||||||
|
"""Convert text to a valid HTML anchor ID."""
|
||||||
|
# Remove accents and special chars, lowercase, replace spaces with dashes
|
||||||
|
anchor = text.lower()
|
||||||
|
anchor = re.sub(r'[àáâãäå]', 'a', anchor)
|
||||||
|
anchor = re.sub(r'[èéêë]', 'e', anchor)
|
||||||
|
anchor = re.sub(r'[ìíîï]', 'i', anchor)
|
||||||
|
anchor = re.sub(r'[òóôõö]', 'o', anchor)
|
||||||
|
anchor = re.sub(r'[ùúûü]', 'u', anchor)
|
||||||
|
anchor = re.sub(r'[ýÿ]', 'y', anchor)
|
||||||
|
anchor = re.sub(r'[ç]', 'c', anchor)
|
||||||
|
anchor = re.sub(r'[^a-z0-9\s-]', '', anchor)
|
||||||
|
anchor = re.sub(r'\s+', '-', anchor.strip())
|
||||||
|
return anchor or "section"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_duration(seconds: float) -> str:
|
||||||
|
seconds = int(seconds)
|
||||||
|
days, seconds = divmod(seconds, 86400)
|
||||||
|
hours, seconds = divmod(seconds, 3600)
|
||||||
|
minutes, seconds = divmod(seconds, 60)
|
||||||
|
parts = []
|
||||||
|
if days: parts.append(f"{days} days")
|
||||||
|
if hours: parts.append(f"{hours} hours")
|
||||||
|
if minutes: parts.append(f"{minutes} minutes")
|
||||||
|
if seconds: parts.append(f"{seconds} seconds")
|
||||||
|
return ", ".join(parts) if parts else "0 seconds"
|
||||||
|
|
||||||
|
async def run_weekly_on_saturday(self):
|
||||||
|
"""Run every Saturday at 12:00 (noon)."""
|
||||||
|
while True:
|
||||||
|
now = dt.datetime.now()
|
||||||
|
|
||||||
|
# Calculate next Saturday at 12:00
|
||||||
|
days_until_saturday = (5 - now.weekday()) % 7 # Saturday = 5
|
||||||
|
if days_until_saturday == 0 and now.hour >= 12:
|
||||||
|
days_until_saturday = 7 # Already past Saturday 12:00, wait for next week
|
||||||
|
|
||||||
|
next_run = (now + dt.timedelta(days=days_until_saturday)).replace(
|
||||||
|
hour=12, minute=0, second=0, microsecond=0
|
||||||
|
)
|
||||||
|
|
||||||
|
sleep_seconds = (next_run - now).total_seconds()
|
||||||
|
while sleep_seconds > 0:
|
||||||
|
LOG.info("Waiting for %s for next scan (Saturday noon)", self._format_duration(sleep_seconds))
|
||||||
|
await asyncio.sleep(min(sleep_seconds, 5 * 60))
|
||||||
|
now = dt.datetime.now()
|
||||||
|
sleep_seconds = (next_run - now).total_seconds()
|
||||||
|
|
||||||
|
LOG.info("Going to run the weekly task")
|
||||||
|
await self.weekly_task()
|
||||||
|
|
||||||
|
async def weekly_task(self):
|
||||||
|
"""Main weekly task: collect, filter, group, and publish."""
|
||||||
|
# Log newsletters (debug)
|
||||||
|
try:
|
||||||
|
nls = self.ghost.get_newsletters()
|
||||||
|
LOG.info("Newsletters: %s", ", ".join(f"{n.get('name')}[{n.get('slug')}]" for n in nls))
|
||||||
|
except Exception as e:
|
||||||
|
LOG.warning("Unable to list newsletters: %s", e)
|
||||||
|
|
||||||
|
title_post = "Les news de la semaine du " + self._fr_week_range()
|
||||||
|
LOG.info("Running weekly task : %s", title_post)
|
||||||
|
|
||||||
|
# (Re)charge les feeds
|
||||||
|
feeds_file = os.environ.get("FEEDS_FILE", "/data/feeds.txt")
|
||||||
|
if not os.path.isfile(feeds_file):
|
||||||
|
feeds_file = os.environ.get("FEEDS_FILE_FALLBACK", r"f:\workspace\Substack_JV\feeds.txt")
|
||||||
|
feeds: List[RSSfeed] = []
|
||||||
|
with open(feeds_file, encoding="utf-8") as f:
|
||||||
|
lines = [line.strip() for line in f if line.strip()]
|
||||||
|
for line in lines:
|
||||||
|
feeds.append(RSSfeed(line, "youtube" in line.lower()))
|
||||||
|
self.feeds = feeds
|
||||||
|
|
||||||
|
# Fenêtre: depuis 7 jours à 06:00 UTC
|
||||||
|
week_ago_6am_utc = dt.datetime.now(dt.timezone.utc).replace(
|
||||||
|
hour=6, minute=0, second=0, microsecond=0
|
||||||
|
) - dt.timedelta(days=7)
|
||||||
|
|
||||||
|
all_news_posts: List[dict] = []
|
||||||
|
for feed in self.feeds:
|
||||||
|
LOG.info("Scanning feed %s", feed.url)
|
||||||
|
content = self._safe_get(feed.url, timeout=30)
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
fp = feedparser.parse(content)
|
||||||
|
|
||||||
|
# Sélection des items de la semaine
|
||||||
|
new_entries = []
|
||||||
|
for e in fp.entries:
|
||||||
|
dte = self._entry_datetime(e)
|
||||||
|
if dte and dte > week_ago_6am_utc:
|
||||||
|
new_entries.append(e)
|
||||||
|
|
||||||
|
# Basic URL-based filtering (keep existing logic)
|
||||||
|
filtered = []
|
||||||
|
for e in new_entries:
|
||||||
|
linkURL = e.get("link", "") or ""
|
||||||
|
if "actugaming" in linkURL and ("puzzle-" in linkURL or "guide-" in linkURL):
|
||||||
|
continue
|
||||||
|
# enrich YouTube id if applicable
|
||||||
|
if feed.youtube and linkURL:
|
||||||
|
vid = extract_youtube_id(linkURL)
|
||||||
|
if vid:
|
||||||
|
e["yt_videoid"] = vid
|
||||||
|
filtered.append(e)
|
||||||
|
|
||||||
|
all_news_posts.extend(filtered)
|
||||||
|
|
||||||
|
if not all_news_posts:
|
||||||
|
LOG.warning("Aucun item récupéré (flux down ?). On n'envoie pas cette semaine.")
|
||||||
|
return
|
||||||
|
|
||||||
|
LOG.info("Collected %d items from feeds", len(all_news_posts))
|
||||||
|
|
||||||
|
# Use Mistral AI for filtering and grouping if available
|
||||||
|
if self.mistral:
|
||||||
|
LOG.info("Using Mistral AI to filter non-news content...")
|
||||||
|
filtered_posts = self.mistral.filter_news_items(all_news_posts, dry_run=self.dry_run)
|
||||||
|
LOG.info("After filtering: %d items (removed %d)",
|
||||||
|
len(filtered_posts), len(all_news_posts) - len(filtered_posts))
|
||||||
|
|
||||||
|
if filtered_posts:
|
||||||
|
LOG.info("Using Mistral AI to group items by category...")
|
||||||
|
categories = self.mistral.group_similar_items(filtered_posts)
|
||||||
|
total_cats = len(categories)
|
||||||
|
total_subgroups = sum(len(cat.get("subgroups", [])) for cat in categories)
|
||||||
|
LOG.info("Created %d categories with %d sub-groups", total_cats, total_subgroups)
|
||||||
|
else:
|
||||||
|
categories = []
|
||||||
|
else:
|
||||||
|
LOG.warning("No Mistral API key configured, skipping AI filtering/grouping")
|
||||||
|
# Fallback: single category with all items
|
||||||
|
categories = [{
|
||||||
|
"name": "Actualités de la semaine",
|
||||||
|
"subgroups": [{"title": "Toutes les news", "items": all_news_posts}]
|
||||||
|
}]
|
||||||
|
|
||||||
|
if not categories or all(
|
||||||
|
len(sg.get("items", [])) == 0
|
||||||
|
for cat in categories
|
||||||
|
for sg in cat.get("subgroups", [])
|
||||||
|
):
|
||||||
|
LOG.warning("No news items after filtering. Skipping this week.")
|
||||||
|
return
|
||||||
|
|
||||||
|
roundup_html, feature_image = self._build_html_roundup_grouped(categories, self.feeds)
|
||||||
|
|
||||||
|
# 1) Create draft (with feature image if any)
|
||||||
|
created = self.ghost.create_post_html(title_post, roundup_html, status="draft", feature_image=feature_image)
|
||||||
|
LOG.info("Created draft post: %s (id: %s)", created.get("title"), created.get("id"))
|
||||||
|
|
||||||
|
# 2) Publish + send email (unless dry-run mode)
|
||||||
|
if self.dry_run:
|
||||||
|
LOG.info("DRY-RUN MODE: Post created as draft but NOT published. URL: %s",
|
||||||
|
created.get("url", "N/A"))
|
||||||
|
LOG.info("DRY-RUN MODE: Review the draft in Ghost admin, then publish manually if satisfied.")
|
||||||
|
return
|
||||||
|
|
||||||
|
published = self.ghost.publish_post(
|
||||||
|
post_id=created["id"],
|
||||||
|
updated_at=created["updated_at"],
|
||||||
|
newsletter_slug=os.environ.get("GHOST_NEWSLETTER_SLUG"),
|
||||||
|
email_segment=os.environ.get("GHOST_EMAIL_SEGMENT"),
|
||||||
|
)
|
||||||
|
LOG.info("Published post: %s (emailed via newsletter)", published.get("url"))
|
||||||
|
|
||||||
|
# ------------- main -------------
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
setuplogger()
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--runonce", action="store_true", help="Run now and exit (no scheduler)")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", dest="dry_run",
|
||||||
|
help="Run immediately, create draft but do NOT publish (for testing)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Feeds init (list may be reloaded inside task)
|
||||||
|
feeds: List[RSSfeed] = []
|
||||||
|
feeds_file = os.environ.get("FEEDS_FILE", "/data/feeds.txt")
|
||||||
|
if not os.path.isfile(feeds_file):
|
||||||
|
feeds_file = os.environ.get("FEEDS_FILE_FALLBACK", r"f:\workspace\Substack_JV\feeds.txt")
|
||||||
|
with open(feeds_file, encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
feeds.append(RSSfeed(line, "youtube" in line.lower()))
|
||||||
|
|
||||||
|
admin_url = os.environ["GHOST_ADMIN_URL"] # e.g. https://ghostadmin.zep.best/ghost/api/admin/
|
||||||
|
admin_key = os.environ["GHOST_ADMIN_KEY"] # integration_id:secret_hex
|
||||||
|
mistral_api_key = os.environ.get("MISTRAL_API_KEY") # Optional: for AI filtering/grouping
|
||||||
|
|
||||||
|
if not mistral_api_key:
|
||||||
|
LOG.warning("MISTRAL_API_KEY not set. AI filtering and grouping will be disabled.")
|
||||||
|
|
||||||
|
task = GhostTask(
|
||||||
|
feeds=feeds,
|
||||||
|
admin_url=admin_url,
|
||||||
|
admin_key=admin_key,
|
||||||
|
mistral_api_key=mistral_api_key,
|
||||||
|
newsletter_slug=os.environ.get("GHOST_NEWSLETTER_SLUG"),
|
||||||
|
email_segment=os.environ.get("GHOST_EMAIL_SEGMENT"),
|
||||||
|
dry_run=args.dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
LOG.info("Starting bot (weekly mode%s)", " - DRY RUN" if args.dry_run else "")
|
||||||
|
|
||||||
|
if args.runonce:
|
||||||
|
await task.weekly_task()
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
LOG.info("DRY-RUN: Running weekly task immediately (will create draft only)")
|
||||||
|
await task.weekly_task()
|
||||||
|
return
|
||||||
|
|
||||||
|
# Démarrage: publier l'édition de la semaine si elle n'existe pas encore
|
||||||
|
await task.maybe_run_this_week()
|
||||||
|
|
||||||
|
# Planification hebdomadaire le samedi à 12:00 Europe/Brussels
|
||||||
|
await task.run_weekly_on_saturday()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
1040
presquegratos.py
Normal file
1040
presquegratos.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,7 @@
|
|||||||
requests
|
|
||||||
feedparser
|
feedparser
|
||||||
python-substack
|
PyJWT>=2.7,<3
|
||||||
|
requests>=2.31
|
||||||
|
feedparser>=6.0
|
||||||
|
aiohttp
|
||||||
|
bs4
|
||||||
|
playwright
|
||||||
49
storage.py
Normal file
49
storage.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# storage.py
|
||||||
|
from __future__ import annotations
|
||||||
|
import sqlite3, pathlib, datetime as dt
|
||||||
|
from typing import Optional, Iterable, Tuple
|
||||||
|
import os
|
||||||
|
DB_PATH = "/data/published.db" # bind-mount ./data:/data in docker
|
||||||
|
|
||||||
|
_SCHEMA = """
|
||||||
|
PRAGMA journal_mode = WAL;
|
||||||
|
CREATE TABLE IF NOT EXISTS published_items(
|
||||||
|
platform TEXT NOT NULL, -- e.g. xgp | egs | psplus
|
||||||
|
key TEXT PRIMARY KEY, -- your dedupe key (see below)
|
||||||
|
first_seen_utc TEXT NOT NULL, -- ISO-8601
|
||||||
|
last_post_id TEXT -- Ghost post id that recorded it
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_platform ON published_items(platform);
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Storage:
|
||||||
|
def __init__(self, db_path: str = DB_PATH):
|
||||||
|
pathlib.Path(db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.conn = sqlite3.connect(db_path)
|
||||||
|
self.conn.execute("PRAGMA foreign_keys = ON;")
|
||||||
|
for stmt in filter(None, _SCHEMA.split(";")):
|
||||||
|
if stmt.strip():
|
||||||
|
self.conn.execute(stmt)
|
||||||
|
|
||||||
|
def seen(self, key: str) -> bool:
|
||||||
|
cur = self.conn.execute("SELECT 1 FROM published_items WHERE key=?", (key,))
|
||||||
|
return cur.fetchone() is not None
|
||||||
|
|
||||||
|
def remember(self, platform: str, key: str, post_id: Optional[str]):
|
||||||
|
self.conn.execute(
|
||||||
|
"INSERT OR IGNORE INTO published_items(platform,key,first_seen_utc,last_post_id) VALUES(?,?,?,?)",
|
||||||
|
(platform, key, dt.datetime.utcnow().isoformat(), post_id),
|
||||||
|
)
|
||||||
|
if post_id:
|
||||||
|
self.conn.execute("UPDATE published_items SET last_post_id=? WHERE key=?", (post_id, key))
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
def bulk_remember(self, platform: str, pairs: Iterable[Tuple[str, Optional[str]]]):
|
||||||
|
rows = [(platform, k, dt.datetime.utcnow().isoformat(), pid) for (k, pid) in pairs]
|
||||||
|
self.conn.executemany(
|
||||||
|
"INSERT OR IGNORE INTO published_items(platform,key,first_seen_utc,last_post_id) VALUES(?,?,?,?)",
|
||||||
|
rows
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
|
||||||
|
|
||||||
@@ -1,7 +1,58 @@
|
|||||||
#!/bin/bash
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
# Pull the latest changes
|
log() { printf '%s %s\n' "[$(date -u +%FT%TZ)]" "$*"; }
|
||||||
git pull origin main
|
|
||||||
|
|
||||||
# Run your Python script
|
stop() {
|
||||||
python Post_RSS_on_SubStack.py
|
log "stopping..."
|
||||||
|
[ -n "${PID1-}" ] && kill -TERM "$PID1" 2>/dev/null || true
|
||||||
|
[ -n "${PID2-}" ] && kill -TERM "$PID2" 2>/dev/null || true
|
||||||
|
[ -n "${TPID-}" ] && kill -TERM "$TPID" 2>/dev/null || true
|
||||||
|
wait || true
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
trap stop INT TERM
|
||||||
|
|
||||||
|
cd /app
|
||||||
|
export GIT_TERMINAL_PROMPT=0
|
||||||
|
|
||||||
|
# MAJ forcée du code à chaque (re)démarrage
|
||||||
|
if [ -d .git ]; then
|
||||||
|
i=0
|
||||||
|
while [ $i -lt 5 ]; do
|
||||||
|
if git fetch --all --prune && git reset --hard origin/main; then
|
||||||
|
log "git updated to origin/main"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
i=$((i+1))
|
||||||
|
log "git update failed (attempt $i/5); retrying in 10s..."
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
[ $i -ge 5 ] && log "WARNING: git update failed after 5 attempts — continuing with current code"
|
||||||
|
else
|
||||||
|
log "WARNING: /app is not a git repo; skipping git update"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Dossiers logs
|
||||||
|
mkdir -p /var/log
|
||||||
|
: > /var/log/daily.log
|
||||||
|
: > /var/log/weekly.log
|
||||||
|
|
||||||
|
# Lancer les 2 bots (logs non bufferisés)
|
||||||
|
python -u post_rss_to_ghost.py > /var/log/daily.log 2>&1 & PID1=$!
|
||||||
|
python -u presquegratos.py > /var/log/weekly.log 2>&1 & PID2=$!
|
||||||
|
|
||||||
|
# Suivre les 2 fichiers de logs dans la sortie du conteneur
|
||||||
|
tail -F /var/log/daily.log /var/log/weekly.log &
|
||||||
|
TPID=$!
|
||||||
|
|
||||||
|
# Attente portable (pas de wait -n en /bin/sh)
|
||||||
|
while :; do
|
||||||
|
if ! kill -0 "$PID1" 2>/dev/null; then wait "$PID1" || true; break; fi
|
||||||
|
if ! kill -0 "$PID2" 2>/dev/null; then wait "$PID2" || true; break; fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
# Si un des scripts sort, on arrête le tail (le trap TERM arrêtera l'autre script)
|
||||||
|
kill -TERM "$TPID" 2>/dev/null || true
|
||||||
|
wait || true
|
||||||
|
|||||||
23
xboxsyde.py
Normal file
23
xboxsyde.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
import feedparser
|
||||||
|
import io
|
||||||
|
import html
|
||||||
|
import datetime
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
|
||||||
|
url = r'https://www.xboxygen.com/spip.php?page=backend'
|
||||||
|
|
||||||
|
html_text = requests.get(url).text
|
||||||
|
news = feedparser.parse(html_text)
|
||||||
|
|
||||||
|
yesterday_6am = datetime.datetime.now(datetime.timezone.utc).replace(hour=6, minute=0, second=0, microsecond=0) - datetime.timedelta(days=1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_posts = [entry for entry in news.entries if datetime.datetime.strptime(entry.published.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z') > yesterday_6am]
|
||||||
|
|
||||||
|
except:
|
||||||
|
new_posts = [entry for entry in news.entries if datetime.datetime.fromtimestamp(time.mktime(entry.updated_parsed)).replace(tzinfo=datetime.timezone.utc) > yesterday_6am]
|
||||||
|
#else if
|
||||||
|
#entry.updated.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z'
|
||||||
|
|
||||||
|
print(new_posts)
|
||||||
Reference in New Issue
Block a user