diff --git a/README.md b/README.md index bd97ef7..41ff5d9 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,319 @@ -# GitBot -Python Bot for Git webhooks and RSS feeds +# gitbot + +A minimal IRC bot for git forge webhooks (GitHub, Gitea, GitLab) and RSS feeds. +Supports multiple IRC networks simultaneously, hot reload of network config, and +persistent webhook/RSS routing managed entirely via IRC commands. + +--- + +## Requirements + +- Python 3.11+ +- `aiohttp`, `feedparser` (installed via pip into a virtualenv — see below) + +> **Python < 3.11:** install `tomli` as well (`pip install tomli`), which +> provides the TOML parser that became part of stdlib in 3.11. + +--- + +## Installation + +Modern Ubuntu (23.04+) and Debian (12+) prevent installing packages with pip +outside of a virtual environment. The steps below work on all supported systems. + +### 1. Create a dedicated user (recommended) + +```bash +sudo useradd -r -m -d /opt/gitbot -s /bin/bash gitbot +sudo -u gitbot bash +cd /opt/gitbot +``` + +### 2. Copy the bot files + +```bash +# as the gitbot user, from /opt/gitbot +cp -r /path/to/gitbot/* . +``` + +### 3. Create a virtualenv and install dependencies + +```bash +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +The virtualenv lives at `/opt/gitbot/venv/`. You must activate it (or use the +full path to `venv/bin/python`) whenever running the bot manually. + +### 4. Create the config file + +```bash +cp gitbot.toml.example gitbot.toml +$EDITOR gitbot.toml +``` + +The config file controls IRC networks, the webhook HTTP server, RSS polling +interval, and optional bind addresses. See `gitbot.toml.example` for full +documentation of every option. + +### 5. Create the owner account + +The bot requires an owner account before it will start. Run setup once: + +```bash +source venv/bin/activate # if not already active +python bot.py --setup -c gitbot.toml +``` + +You will be prompted for a nick and password. These are stored in the SQLite +database (password hashed with PBKDF2-SHA256, 260 000 iterations). + +### 6. Start the bot + +```bash +python bot.py -c gitbot.toml +``` + +Add `-v` / `--verbose` for debug-level logging. + +--- + +## Systemd service + +Create `/etc/systemd/system/gitbot.service`: + +```ini +[Unit] +Description=gitbot IRC webhook/RSS bot +After=network.target + +[Service] +User=gitbot +Group=gitbot +WorkingDirectory=/opt/gitbot +ExecStart=/opt/gitbot/venv/bin/python bot.py -c /opt/gitbot/gitbot.toml +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +``` + +Then enable and start it: + +```bash +sudo systemctl daemon-reload +sudo systemctl enable --now gitbot +sudo journalctl -fu gitbot # follow logs +``` + +To run `--setup` with systemd in place: + +```bash +sudo systemctl stop gitbot +sudo -u gitbot /opt/gitbot/venv/bin/python /opt/gitbot/bot.py --setup -c /opt/gitbot/gitbot.toml +sudo systemctl start gitbot +``` + +--- + +## Authentication + +All bot management commands require you to be identified. Authentication is +**PM-only** — never send your password in a channel. + +### Logging in + +``` +/msg gitbot identify +/msg gitbot identify ← use this if your current nick differs +``` + +### Auto-login via hostmask + +Once identified, add your current `nick!user@host` as a trusted mask so future +logins are automatic: + +``` +/msg gitbot hostmask add ← adds your current nick!user@host +/msg gitbot hostmask add *!*@your.isp.net ← or any glob pattern +/msg gitbot hostmask list +/msg gitbot hostmask remove +``` + +Globs (`*` and `?`) are supported anywhere in the mask. Useful patterns: + +- `*!*@unaffiliated/yournick` — libera/oftc cloak +- `*!*@gateway/web/freenode/*` — web gateway +- `yournick!*@*` — any host, matched only when using your nick (less safe) + +### Other account commands (all PM-only) + +``` +/msg gitbot passwd ← change password +/msg gitbot logout ← end current session +``` + +--- + +## IRC commands + +All channel commands start with `!`. The bot silently ignores commands from +unauthenticated users — it does not reveal that management commands exist. + +### Reload config + +Re-reads `gitbot.toml` without restarting. Connects new networks, disconnects +removed ones, joins/parts channels as needed, and cleans up orphaned webhook +and RSS entries from the database for any removed networks or channels. + +``` +!reload +``` + +### Webhooks + +``` +!webhook list +!webhook add +!webhook remove +!webhook events ← show current event filter +!webhook events code pr repo ← set event filter +!webhook branches ← show current branch filter +!webhook branches main develop ← only announce these branches +``` + +`` can be: +- `owner/repo` — a specific repository +- `owner` — all repos by that owner (matched against the owner field in the payload) + +Configure your forge to send webhooks to: +``` +https://yourhost/github +https://yourhost/gitea +https://yourhost/gitlab +``` + +Optionally append `?secret=` to authenticate without HMAC setup: +``` +https://yourhost/gitea?secret=mysecret +``` + +### RSS feeds + +``` +!rss list +!rss add +!rss remove +!rss format ← show current format template +!rss format $feed_name: $title <$link> ← set format template +``` + +On first add, the bot delivers the most recent entry immediately so you can +confirm the feed is working and preview the format. Subsequent polls deliver +up to 3 new entries per cycle. + +**RSS format template variables** (any feedparser field works): + +| Variable | Description | +|---|---| +| `$feed_name` | Feed title, with Gitea's `Feed of "..."` wrapper stripped | +| `$feed_title` | Raw feed title | +| `$feed_link` | Feed URL | +| `$feed_author` | Feed-level author | +| `$feed_subtitle` | Feed subtitle/description | +| `$title` | Entry title (HTML stripped) | +| `$link` | Entry URL | +| `$description` | Entry summary/description (HTML stripped) | +| `$author` | Entry author | +| `$published` | Publication date string | +| `$id` | Entry unique ID | + +Default format: `$feed_name: $title <$link>` + +Uses Python's `string.Template` with `safe_substitute`, so unknown variables +are left as-is rather than raising an error. + +--- + +## Webhook event categories + +Use these with `!webhook events`: + +| Category | What it includes | +|---|---| +| `ping` | New webhook registered confirmation | +| `code` | Pushes, commit comments | +| `pr` | PR opened/closed/merged/reviewed/commented (+ label, rename, sync) | +| `pr-minimal` | PR opened/closed/reopened/merged only | +| `issue` | Issues opened/closed/edited/assigned + comments | +| `issue-minimal` | Issues opened/closed/reopened only | +| `repo` | Releases, forks, branch/tag create/delete | +| `star` | Stars (GitHub only) | + +You can also use raw forge event names like `push`, `pull_request`, +`merge_request`, etc. Default filter when adding a webhook: `ping code pr issue repo`. + +--- + +## Webhook secrets + +Each forge has its own secret to allow the same `owner/repo` slug to exist on +multiple forges owned by different people: + +```toml +[webhook_server] +github_secret = "abc" +gitea_secret = "xyz" +gitlab_secret = "def" +``` + +Two verification modes are supported — pick whichever is easier: + +**URL token** (simplest — just append to the payload URL): +``` +https://yourhost/gitea?secret=xyz +``` + +**HMAC header** (more secure — secret never appears in URLs or logs): +Set the secret in both `gitbot.toml` and the forge's webhook settings UI. +GitHub and Gitea use HMAC-SHA256; GitLab sends the token directly. + +If a URL token is present it takes priority; otherwise HMAC headers are checked. +If no secret is configured for a forge, all requests are accepted. + +--- + +## Database + +SQLite at the path configured by `database` in `gitbot.toml` (default: `gitbot.db`). + +Stores: +- Owner account (nick + hashed password + hostmasks) +- Webhook routing (network, channel, repo, event filter, branch filter) +- RSS feeds (network, channel, URL, format template) +- Seen RSS entry IDs (to avoid re-announcing; capped at 500 per feed) + +Schema is created automatically on first run. The `format` column added in a +later release is migrated automatically via `ALTER TABLE` if you're upgrading +from an earlier version. + +--- + +## Migrating between machines + +The database is fully portable — copy `gitbot.db` alongside `gitbot.toml`. + +The only machine-specific config is the `bind` address. Update or remove it +in `gitbot.toml` before starting on the new host. + +```bash +# on old machine +systemctl stop gitbot +scp gitbot.db gitbot.toml newhost:/opt/gitbot/ + +# on new machine — edit bind if needed, then: +systemctl start gitbot +``` diff --git a/auth.py b/auth.py new file mode 100644 index 0000000..4feeb7a --- /dev/null +++ b/auth.py @@ -0,0 +1,143 @@ +""" +Authentication module. + +- One owner account, stored in DB with bcrypt-hashed password +- Sessions are in-memory (dict of nick!user@host -> authenticated bool) +- Hostmasks stored in DB; auto-login on PRIVMSG if mask matches +- identify only accepted via PM +""" + +import fnmatch +import hashlib +import hmac +import logging +import os +import secrets + +log = logging.getLogger("auth") + +# In-memory sessions: maps full prefix "nick!user@host" -> True +_sessions: dict[str, bool] = {} + + +# ── Password hashing (simple PBKDF2, no bcrypt dep) ─────────────────────────── + +def _hash_password(password: str, salt: bytes = None) -> str: + """Return 'salt_hex:hash_hex' string.""" + if salt is None: + salt = os.urandom(32) + key = hashlib.pbkdf2_hmac("sha256", password.encode(), salt, 260_000) + return f"{salt.hex()}:{key.hex()}" + + +def _check_password(password: str, stored: str) -> bool: + try: + salt_hex, key_hex = stored.split(":", 1) + salt = bytes.fromhex(salt_hex) + key = bytes.fromhex(key_hex) + except Exception: + return False + candidate = hashlib.pbkdf2_hmac("sha256", password.encode(), salt, 260_000) + return hmac.compare_digest(candidate, key) + + +# ── DB helpers (called with an open sqlite3 connection) ─────────────────────── + +def setup_schema(db): + db.executescript(""" + CREATE TABLE IF NOT EXISTS owner ( + id INTEGER PRIMARY KEY CHECK (id = 1), + nick TEXT NOT NULL, + password TEXT NOT NULL + ); + CREATE TABLE IF NOT EXISTS owner_hostmasks ( + mask TEXT PRIMARY KEY + ); + """) + db.commit() + + +def has_owner(db) -> bool: + row = db.execute("SELECT 1 FROM owner WHERE id=1").fetchone() + return row is not None + + +def create_owner(db, nick: str, password: str): + hashed = _hash_password(password) + db.execute(""" + INSERT INTO owner(id, nick, password) VALUES (1, ?, ?) + ON CONFLICT(id) DO UPDATE SET nick=excluded.nick, password=excluded.password + """, (nick, hashed)) + db.commit() + log.info("Owner account created for %s", nick) + + +def verify_password(db, password: str) -> bool: + row = db.execute("SELECT password FROM owner WHERE id=1").fetchone() + if not row: + return False + return _check_password(password, row["password"]) + + +def get_owner_nick(db) -> str | None: + row = db.execute("SELECT nick FROM owner WHERE id=1").fetchone() + return row["nick"] if row else None + + +def change_password(db, new_password: str): + hashed = _hash_password(new_password) + db.execute("UPDATE owner SET password=? WHERE id=1", (hashed,)) + db.commit() + + +# ── Hostmasks ───────────────────────────────────────────────────────────────── + +def hostmask_add(db, mask: str): + db.execute("INSERT OR IGNORE INTO owner_hostmasks(mask) VALUES (?)", (mask,)) + db.commit() + + +def hostmask_remove(db, mask: str): + db.execute("DELETE FROM owner_hostmasks WHERE mask=?", (mask,)) + db.commit() + + +def hostmask_list(db) -> list[str]: + rows = db.execute("SELECT mask FROM owner_hostmasks ORDER BY mask").fetchall() + return [r["mask"] for r in rows] + + +def hostmask_matches(db, prefix: str) -> bool: + """Check if nick!user@host matches any stored mask (supports * ? globs).""" + masks = hostmask_list(db) + for mask in masks: + if fnmatch.fnmatchcase(prefix.lower(), mask.lower()): + return True + return False + + +# ── Sessions ────────────────────────────────────────────────────────────────── + +def login(prefix: str): + """Mark a prefix as authenticated for this session.""" + _sessions[prefix] = True + log.info("Session opened for %s", prefix) + + +def logout(prefix: str): + _sessions.pop(prefix, None) + log.info("Session closed for %s", prefix) + + +def is_authenticated(db, prefix: str) -> bool: + """ + Returns True if: + - prefix has an active in-memory session, OR + - prefix matches a stored hostmask (and we auto-login them) + """ + if _sessions.get(prefix): + return True + if hostmask_matches(db, prefix): + login(prefix) # auto-login for this session + return True + return False diff --git a/bot.py b/bot.py new file mode 100644 index 0000000..70d9829 --- /dev/null +++ b/bot.py @@ -0,0 +1,367 @@ +""" +gitbot — main entry point. + +Usage: + python bot.py [gitbot.toml] # normal run + python bot.py --setup [gitbot.toml] # create/reset owner account, then run +""" + +import asyncio +import getpass +import itertools +import logging +import sys + +try: + import tomllib +except ImportError: + try: + import tomli as tomllib + except ImportError: + print("Python < 3.11 detected. Install tomli: pip install tomli", + file=sys.stderr) + sys.exit(1) + +import auth +import commands +import db +import irc_format as fmt +import rss as rss_module +import webhook_github +import webhook_gitea +import webhook_gitlab +from irc_client import IRCClient +from webhook_server import WebhookServer + +log = logging.getLogger("bot") + +PARSERS = { + "github": webhook_github, + "gitea": webhook_gitea, + "gitlab": webhook_gitlab, +} + +DEFAULT_EVENTS = {"ping", "code", "pr", "issue", "repo"} + + +def load_config(path: str) -> dict: + with open(path, "rb") as f: + return tomllib.load(f) + + +def run_setup(database): + """Interactive terminal setup for the owner account.""" + print() + if auth.has_owner(database): + print("An owner account already exists.") + answer = input("Reset it? [y/N] ").strip().lower() + if answer != "y": + print("Setup cancelled.") + return + print("── gitbot owner account setup ──────────────────") + nick = input("Owner nick: ").strip() + if not nick: + print("Nick cannot be empty.") + sys.exit(1) + while True: + password = getpass.getpass("Password: ") + password2 = getpass.getpass("Confirm password: ") + if not password: + print("Password cannot be empty.") + elif password != password2: + print("Passwords do not match, try again.") + else: + break + auth.create_owner(database, nick, password) + print(f"Owner account created for '{nick}'.") + print("You can now /msg the bot: identify ") + print() + + +class Bot: + def __init__(self, config: dict, config_path: str): + self._cfg = config + self._config_path = config_path + self._database = db.connect(config.get("database", "gitbot.db")) + self._clients: dict[str, IRCClient] = {} + + self._load_static_webhooks() + self._load_static_rss() + + # ── Static config loading ───────────────────────────────────────────────── + + def _load_static_webhooks(self): + for net_cfg in self._cfg.get("network", []): + net = net_cfg["name"] + for ch_cfg in net_cfg.get("channel", []): + ch = ch_cfg["name"] + for hook in ch_cfg.get("webhook", []): + db.webhook_add( + self._database, net, ch, + hook["repo"], + hook.get("events", list(DEFAULT_EVENTS)), + hook.get("branches", []), + ) + + def _load_static_rss(self): + for net_cfg in self._cfg.get("network", []): + net = net_cfg["name"] + for ch_cfg in net_cfg.get("channel", []): + ch = ch_cfg["name"] + for url in ch_cfg.get("rss", []): + db.rss_add(self._database, net, ch, url) # (id, created) ignored here + + # ── IRC message routing ─────────────────────────────────────────────────── + + async def _on_message(self, network: str, target: str, nick: str, + prefix: str, text: str): + own_nick = self._clients[network].config["nickname"] + + if target.lower() == own_nick.lower(): + # Private message to the bot + async def pm_reply(msg): + self._clients[network].privmsg(nick, msg) + + await commands.handle_pm( + network, nick, prefix, text, + self._database, pm_reply, self.reload) + elif target.startswith("#"): + # Channel message + async def ch_reply(msg): + await self._deliver_irc(network, target, msg) + + await commands.handle_channel( + network, target, nick, prefix, text, + self._database, ch_reply, self.reload) + + async def _on_connected(self, network: str): + log.info("[%s] Connected and registered", network) + + # ── Hot reload ──────────────────────────────────────────────────────────── + + async def reload(self) -> str: + """ + Re-read the config file and reconcile network connections: + - New networks → connect + - Gone networks → QUIT and stop + - Kept networks → join any new channels, part removed ones + Returns a human-readable summary. + """ + try: + new_cfg = load_config(self._config_path) + except Exception as e: + return f"Failed to reload config: {e}" + + self._cfg = new_cfg + self._load_static_webhooks() + self._load_static_rss() + + global_bind = new_cfg.get("bind") + new_nets = {n["name"]: n for n in new_cfg.get("network", [])} + current_nets = set(self._clients.keys()) + + added = [] + removed = [] + updated = [] + + # Disconnect networks that are no longer in config + for name in list(current_nets): + if name not in new_nets: + log.info("Reload: disconnecting %s", name) + await self._clients[name].stop("Configuration removed") + del self._clients[name] + db.purge_network(self._database, name) + removed.append(name) + + # Connect new networks; reconcile channels on existing ones + for name, net_cfg in new_nets.items(): + if global_bind and "bind" not in net_cfg: + net_cfg = {**net_cfg, "_global_bind": global_bind} + + if name not in self._clients: + # Brand new network + log.info("Reload: connecting new network %s", name) + self._start_network(net_cfg) + added.append(name) + else: + # Existing network — reconcile channels + client = self._clients[name] + new_chans = {c.lower() for c in net_cfg.get("channels", [])} + cur_chans = set(client._channels) # already lowercase + + for ch in new_chans - cur_chans: + log.info("Reload: joining %s on %s", ch, name) + client.join(ch) + + for ch in cur_chans - new_chans: + log.info("Reload: parting %s on %s", ch, name) + client.send(f"PART {ch} :Removed from config") + db.purge_channel(self._database, name, ch) + + if new_chans != cur_chans: + updated.append(name) + + parts = [] + if added: + parts.append(f"connected: {', '.join(added)}") + if removed: + parts.append(f"disconnected: {', '.join(removed)}") + if updated: + parts.append(f"channels updated: {', '.join(updated)}") + return "Reloaded. " + ("; ".join(parts) if parts else "no network changes.") + + def _start_network(self, net_cfg: dict): + client = IRCClient( + net_cfg, + on_message=self._on_message, + on_connected=self._on_connected, + ) + self._clients[net_cfg["name"]] = client + asyncio.create_task(client.run()) + + # ── IRC delivery ────────────────────────────────────────────────────────── + + async def _deliver_irc(self, network: str, channel: str, message: str): + client = self._clients.get(network) + if not client: + log.warning("No client for network %s", network) + return + if not client.in_channel(channel): + log.debug("[%s] Not in %s yet, joining…", network, channel) + client.join(channel) + await asyncio.sleep(2) + client.privmsg(channel, message) + + # ── Webhook delivery ────────────────────────────────────────────────────── + + async def _on_webhook(self, forge: str, headers: dict, data: dict): + parser = PARSERS.get(forge) + if not parser: + return + + full_name, repo_user, repo_name, organisation = parser.names(data, headers) + branch = parser.branch(data, headers) + events = parser.event(data, headers) + primary = events[0] if events else "" + + targets = db.webhook_targets( + self._database, full_name, repo_user, organisation) + + if not targets: + log.debug("[%s] No targets for %s", forge, full_name) + return + + outputs = parser.parse(full_name, primary, data, headers) + if not outputs: + return + + for target in targets: + if branch and target["branches"] and branch not in target["branches"]: + continue + + allowed = set(itertools.chain.from_iterable( + parser.event_categories(e) for e in target["events"] + )) + if not set(events) & allowed: + continue + + source = fmt.color( + full_name or organisation or repo_name or forge, + fmt.COLOR_REPO) + + for message, url in outputs: + line = f"({source}) {message}" + if url: + line = f"{line} - {url}" + await self._deliver_irc(target["network"], target["channel"], line) + + # ── Main run ────────────────────────────────────────────────────────────── + + async def run(self): + global_bind = self._cfg.get("bind") + + for net_cfg in self._cfg.get("network", []): + if global_bind and "bind" not in net_cfg: + net_cfg = {**net_cfg, "_global_bind": global_bind} + self._start_network(net_cfg) + + wh_cfg = self._cfg.get("webhook_server", {}) + if wh_cfg.get("enabled", True): + # Per-forge secrets; fall back to legacy 'secret' key for all forges + legacy = wh_cfg.get("secret", "") + secrets = { + "github": wh_cfg.get("github_secret", legacy), + "gitea": wh_cfg.get("gitea_secret", legacy), + "gitlab": wh_cfg.get("gitlab_secret", legacy), + } + server = WebhookServer( + host=wh_cfg.get("host", "127.0.0.1"), + port=wh_cfg.get("port", 8080), + deliver=self._on_webhook, + secrets=secrets, + ) + asyncio.create_task(server.run()) + + rss_cfg = self._cfg.get("rss", {}) + if rss_cfg.get("enabled", True): + poller = rss_module.RSSPoller( + database=self._database, + deliver=self._deliver_irc, + interval=rss_cfg.get("interval", 300), + ) + asyncio.create_task(poller.run()) + + log.info("gitbot started") + await asyncio.Event().wait() # run forever + + +def main(): + import argparse + + parser = argparse.ArgumentParser( + description="gitbot — git webhook + RSS IRC bot") + parser.add_argument("-c", "--config", default="gitbot.toml", metavar="FILE", + help="Path to TOML config file (default: gitbot.toml)") + parser.add_argument("--setup", action="store_true", + help="Create or reset the owner account, then start the bot") + parser.add_argument("-v", "--verbose", action="store_true", + help="Enable debug logging") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s %(levelname)-7s %(name)s: %(message)s", + datefmt="%H:%M:%S", + ) + + try: + config = load_config(args.config) + except FileNotFoundError: + print(f"Config file not found: {args.config}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Config error: {e}", file=sys.stderr) + sys.exit(1) + + # Open DB early so setup can use it + database = db.connect(config.get("database", "gitbot.db")) + + if args.setup: + run_setup(database) + elif not auth.has_owner(database): + print("No owner account found. Run with --setup first:", file=sys.stderr) + print(f" python bot.py --setup -c {args.config}", file=sys.stderr) + sys.exit(1) + + bot = Bot(config, args.config) + # Re-use the already-open DB connection + bot._database = database + + try: + asyncio.run(bot.run()) + except KeyboardInterrupt: + log.info("Shutting down") + + +if __name__ == "__main__": + main() diff --git a/commands.py b/commands.py new file mode 100644 index 0000000..2e37684 --- /dev/null +++ b/commands.py @@ -0,0 +1,360 @@ +""" +IRC command dispatcher. + +Commands that change state require authentication. +identify / logout are PM-only. +All other commands work in channels too, but auth is checked via prefix. +""" + +import logging +import auth +import db + +log = logging.getLogger("commands") + +PREFIX = "!" + + +def parse(text: str): + if not text.startswith(PREFIX): + return None, None + parts = text[len(PREFIX):].split() + if not parts: + return None, None + return parts[0].lower(), parts[1:] + + +async def handle_pm(network: str, nick: str, prefix: str, text: str, + database, reply, reload_fn=None): + """ + Handle a private message to the bot. + prefix = "nick!user@host" + reply = async callable(message) — sends a PM back to nick + """ + words = text.strip().split() + if not words: + return + + cmd = words[0].lower() + + # identify and logout don't use the ! prefix — plain words in PM + if cmd == "identify": + if len(words) < 2: + await reply("Usage: identify or identify ") + return + if not auth.has_owner(database): + await reply("No owner account exists. Run the bot with --setup to create one.") + return + # Two forms: + # identify — current nick must match owner nick + # identify — explicit nick, useful when using a different nick + if len(words) == 2: + owner_nick = auth.get_owner_nick(database) + if nick.lower() != owner_nick.lower(): + await reply(f"Your current nick doesn't match the owner nick. Use: identify ") + return + password = words[1] + else: + password = words[2] + if auth.verify_password(database, password): + auth.login(prefix) + await reply("You are now identified.") + log.info("Successful identify from %s (%s)", nick, prefix) + else: + await reply("Wrong password.") + log.warning("Failed identify attempt from %s (%s)", nick, prefix) + return + + if cmd == "logout": + auth.logout(prefix) + await reply("Logged out.") + return + + # Everything below requires auth + if not auth.is_authenticated(database, prefix): + await reply("You are not identified. Use: identify ") + return + + if cmd == "hostmask": + await _hostmask(words[1:], prefix, database, reply) + elif cmd == "passwd": + await _passwd(words[1:], database, reply) + elif cmd in ("help", "bothelp"): + await _pm_help(reply) + else: + # Also accept !-prefixed commands in PM, and bare commands without ! + bare_cmd = cmd.lstrip("!") + bare_args = words[1:] + if cmd.startswith("!"): + bare_cmd, bare_args = parse(text) + bare_cmd = bare_cmd or cmd.lstrip("!") + await _shared(bare_cmd, bare_args, None, None, prefix, database, reply, reload_fn) + + +async def handle_channel(network: str, channel: str, nick: str, prefix: str, + text: str, database, reply, reload_fn=None): + """ + Handle a channel message. + reply = async callable(message) — sends to the channel + """ + cmd, args = parse(text) + if cmd is None: + return + + if cmd in ("identify", "logout"): + await reply(f"{nick}: please use a private message for that.") + return + + if not auth.is_authenticated(database, prefix): + # Silently ignore — don't advertise admin commands to bystanders + return + + await _shared(cmd, args, network, channel, prefix, database, reply, reload_fn) + + +# ── Shared commands (work in both PM and channel) ───────────────────────────── + +async def _shared(cmd, args, network, channel, prefix, database, reply, reload_fn=None): + if cmd == "reload": + if reload_fn is None: + await reply("Reload not available.") + return + result = await reload_fn() + await reply(result) + elif cmd == "webhook": + if not network or not channel: + await reply("!webhook must be used in a channel.") + return + await _webhook(network, channel, args, database, reply) + elif cmd == "rss": + if not network or not channel: + await reply("!rss must be used in a channel.") + return + await _rss(network, channel, args, database, reply) + elif cmd in ("help", "bothelp"): + await _channel_help(reply) + + +# ── hostmask (PM only) ──────────────────────────────────────────────────────── + +async def _hostmask(args, current_prefix, database, reply): + if not args: + await reply( + "hostmask add [mask] — add mask (omit to use your current host) | " + "hostmask remove | " + "hostmask list" + ) + return + + sub = args[0].lower() + + if sub == "list": + masks = auth.hostmask_list(database) + if not masks: + await reply("No hostmasks registered.") + else: + for mask in masks: + await reply(f" {mask}") + + elif sub == "add": + mask = args[1] if len(args) >= 2 else current_prefix + auth.hostmask_add(database, mask) + await reply(f"Hostmask added: {mask}") + + elif sub == "remove": + if len(args) < 2: + await reply("Usage: hostmask remove ") + return + auth.hostmask_remove(database, args[1]) + await reply(f"Hostmask removed: {args[1]}") + + else: + await reply("Unknown subcommand. Use: hostmask add|remove|list") + + +# ── passwd (PM only) ────────────────────────────────────────────────────────── + +async def _passwd(args, database, reply): + if len(args) < 1: + await reply("Usage: passwd ") + return + auth.change_password(database, args[0]) + await reply("Password updated.") + + +# ── !webhook ────────────────────────────────────────────────────────────────── + +WEBHOOK_HELP = ( + "!webhook list | " + "!webhook add | " + "!webhook remove | " + "!webhook events [event …] | " + "!webhook branches [branch …]" +) + +async def _webhook(network, channel, args, database, reply): + if not args: + await reply(WEBHOOK_HELP) + return + + sub = args[0].lower() + + if sub == "list": + hooks = db.webhook_list(database, network, channel) + if not hooks: + await reply("No webhooks registered for this channel.") + else: + for h in hooks: + branches = ", ".join(h["branches"]) or "all" + events = ", ".join(h["events"]) + await reply(f" {h['repo']} events={events} branches={branches}") + + elif sub == "add": + if len(args) < 2: + await reply("Usage: !webhook add ") + return + db.webhook_add(database, network, channel, args[1]) + await reply(f"Webhook added for {args[1]}") + + elif sub == "remove": + if len(args) < 2: + await reply("Usage: !webhook remove ") + return + db.webhook_remove(database, network, channel, args[1]) + await reply(f"Webhook removed for {args[1]}") + + elif sub == "events": + if len(args) < 2: + await reply("Usage: !webhook events [event …]") + return + repo = args[1] + if len(args) == 2: + hooks = db.webhook_list(database, network, channel) + hook = next((h for h in hooks if h["repo"].lower() == repo.lower()), None) + if not hook: + await reply(f"No webhook found for {repo}") + else: + await reply(f"{repo} events: {', '.join(hook['events'])}") + else: + events = [e.lower() for e in args[2:]] + db.webhook_set_events(database, network, channel, repo, events) + await reply(f"Updated events for {repo}: {', '.join(events)}") + + elif sub == "branches": + if len(args) < 2: + await reply("Usage: !webhook branches [branch …]") + return + repo = args[1] + if len(args) == 2: + hooks = db.webhook_list(database, network, channel) + hook = next((h for h in hooks if h["repo"].lower() == repo.lower()), None) + if not hook: + await reply(f"No webhook found for {repo}") + else: + await reply(f"{repo} branches: {', '.join(hook['branches']) or 'all'}") + else: + db.webhook_set_branches(database, network, channel, repo, args[2:]) + await reply(f"Updated branches for {repo}: {', '.join(args[2:])}") + + else: + await reply(WEBHOOK_HELP) + + +# ── !rss ────────────────────────────────────────────────────────────────────── + +RSS_HELP = ( + "!rss list | " + "!rss add | " + "!rss remove | " + "!rss format [template]" +) + +async def _rss(network, channel, args, database, reply): + if not args: + await reply(RSS_HELP) + return + + sub = args[0].lower() + + if sub == "list": + feeds = db.rss_list(database, network, channel) + if not feeds: + await reply("No RSS feeds registered for this channel.") + else: + for f in feeds: + await reply(f" {f['url']} format={f['format']}") + + elif sub == "add": + if len(args) < 2: + await reply("Usage: !rss add ") + return + _, created = db.rss_add(database, network, channel, args[1]) + if created: + await reply(f"RSS feed added: {args[1]}") + else: + await reply(f"Already watching: {args[1]}") + + elif sub == "remove": + if len(args) < 2: + await reply("Usage: !rss remove ") + return + db.rss_remove(database, network, channel, args[1]) + await reply(f"RSS feed removed: {args[1]}") + + elif sub == "format": + if len(args) < 2: + await reply("Usage: !rss format [template]") + return + url = args[1] + if len(args) == 2: + # Show current format + feeds = db.rss_list(database, network, channel) + feed = next((f for f in feeds if f["url"] == url), None) + if not feed: + await reply(f"No feed found for {url}") + else: + await reply(f"Format for {url}: {feed['format']}") + else: + template = " ".join(args[2:]) + found = db.rss_set_format(database, network, channel, url, template) + if found: + await reply( + "Format updated. Entry vars: $title $link $description " + "$author $published $id — " + "Feed vars: $feed_name $feed_title $feed_link $feed_author $feed_subtitle — " + "(any feedparser field works)" + ) + else: + await reply(f"No feed found for {url}") + + else: + await reply(RSS_HELP) + + +# ── help ────────────────────────────────────────────────────────────────────── + +async def _pm_help(reply): + lines = [ + "── PM commands ──────────────────────────────", + " identify log in (current nick must match owner nick)", + " identify log in from a different nick", + " logout end session", + " passwd change password", + " hostmask list show auto-login masks", + " hostmask add [mask] add mask (omit = your current host)", + " hostmask remove remove a mask", + "── Channel commands ─────────────────────────", + " !webhook list/add/remove/events/branches", + " !rss list/add/remove", + ] + for line in lines: + await reply(line) + + +async def _channel_help(reply): + await reply( + "!webhook list/add/remove/events/branches | " + "!rss list/add/remove/format | " + "!reload | " + "PM the bot: identify, logout, passwd, hostmask" + ) diff --git a/db.py b/db.py new file mode 100644 index 0000000..f1b4691 --- /dev/null +++ b/db.py @@ -0,0 +1,216 @@ +"""SQLite persistence layer for gitbot.""" + +import json +import sqlite3 +from pathlib import Path + + +def connect(path: str) -> sqlite3.Connection: + db = sqlite3.connect(path, check_same_thread=False) + db.row_factory = sqlite3.Row + _migrate(db) + import auth + auth.setup_schema(db) + return db + + +def _migrate(db: sqlite3.Connection): + db.executescript(""" + CREATE TABLE IF NOT EXISTS webhook_routes ( + id INTEGER PRIMARY KEY, + network TEXT NOT NULL, + channel TEXT NOT NULL, + repo TEXT NOT NULL, + events TEXT NOT NULL DEFAULT '["ping","code","pr","issue","repo"]', + branches TEXT NOT NULL DEFAULT '[]' + ); + CREATE UNIQUE INDEX IF NOT EXISTS uq_webhook + ON webhook_routes(network, channel, repo); + + CREATE TABLE IF NOT EXISTS rss_feeds ( + id INTEGER PRIMARY KEY, + network TEXT NOT NULL, + channel TEXT NOT NULL, + url TEXT NOT NULL, + format TEXT NOT NULL DEFAULT '$feed_name: $title <$link>' + ); + CREATE UNIQUE INDEX IF NOT EXISTS uq_rss + ON rss_feeds(network, channel, url); + + CREATE TABLE IF NOT EXISTS rss_seen ( + feed_id INTEGER NOT NULL REFERENCES rss_feeds(id) ON DELETE CASCADE, + entry_id TEXT NOT NULL, + PRIMARY KEY (feed_id, entry_id) + ); + """) + db.commit() + # Migrate existing databases that predate the format column + cols = [r[1] for r in db.execute("PRAGMA table_info(rss_feeds)").fetchall()] + if "format" not in cols: + db.execute(""" + ALTER TABLE rss_feeds + ADD COLUMN format TEXT NOT NULL DEFAULT '$feed_name: $title <$link>' + """) + db.commit() + + +def purge_network(db, network: str): + """Remove all webhook routes and RSS feeds for a network.""" + db.execute("DELETE FROM webhook_routes WHERE network=?", (network,)) + db.execute("DELETE FROM rss_feeds WHERE network=?", (network,)) + db.commit() + + +def purge_channel(db, network: str, channel: str): + """Remove all webhook routes and RSS feeds for a specific channel.""" + db.execute("DELETE FROM webhook_routes WHERE network=? AND channel=?", + (network, channel)) + db.execute("DELETE FROM rss_feeds WHERE network=? AND channel=?", + (network, channel)) + db.commit() + + +# ── Webhook routes ──────────────────────────────────────────────────────────── + +def webhook_add(db, network, channel, repo, + events=None, branches=None): + events = events or ["ping", "code", "pr", "issue", "repo"] + branches = branches or [] + db.execute(""" + INSERT INTO webhook_routes(network, channel, repo, events, branches) + VALUES (?,?,?,?,?) + ON CONFLICT(network, channel, repo) DO UPDATE + SET events=excluded.events, branches=excluded.branches + """, (network, channel, repo, + json.dumps(events), json.dumps(branches))) + db.commit() + + +def webhook_remove(db, network, channel, repo): + db.execute(""" + DELETE FROM webhook_routes + WHERE network=? AND channel=? AND repo=? + """, (network, channel, repo)) + db.commit() + + +def webhook_list(db, network, channel): + rows = db.execute(""" + SELECT repo, events, branches FROM webhook_routes + WHERE network=? AND channel=? + ORDER BY repo + """, (network, channel)).fetchall() + return [ + { + "repo": r["repo"], + "events": json.loads(r["events"]), + "branches": json.loads(r["branches"]), + } + for r in rows + ] + + +def webhook_set_events(db, network, channel, repo, events): + db.execute(""" + UPDATE webhook_routes SET events=? + WHERE network=? AND channel=? AND repo=? + """, (json.dumps(events), network, channel, repo)) + db.commit() + + +def webhook_set_branches(db, network, channel, repo, branches): + db.execute(""" + UPDATE webhook_routes SET branches=? + WHERE network=? AND channel=? AND repo=? + """, (json.dumps(branches), network, channel, repo)) + db.commit() + + +def webhook_targets(db, full_name, repo_user, organisation): + """Return all (network, channel, events, branches) matching this repo.""" + rows = db.execute(""" + SELECT network, channel, repo, events, branches FROM webhook_routes + """).fetchall() + results = [] + candidates = {x.lower() for x in [full_name, repo_user, organisation] if x} + for r in rows: + if r["repo"].lower() in candidates: + results.append({ + "network": r["network"], + "channel": r["channel"], + "events": json.loads(r["events"]), + "branches": json.loads(r["branches"]), + }) + return results + + +# ── RSS feeds ───────────────────────────────────────────────────────────────── + +def rss_add(db, network, channel, url): + """Insert feed. Returns (id, created) — created=False if it already existed.""" + existing = db.execute(""" + SELECT id FROM rss_feeds WHERE network=? AND channel=? AND url=? + """, (network, channel, url)).fetchone() + if existing: + return existing["id"], False + db.execute(""" + INSERT INTO rss_feeds(network, channel, url) VALUES (?,?,?) + """, (network, channel, url)) + db.commit() + row = db.execute(""" + SELECT id FROM rss_feeds WHERE network=? AND channel=? AND url=? + """, (network, channel, url)).fetchone() + return row["id"], True + + +def rss_remove(db, network, channel, url): + db.execute(""" + DELETE FROM rss_feeds WHERE network=? AND channel=? AND url=? + """, (network, channel, url)) + db.commit() + + +def rss_list(db, network, channel): + rows = db.execute(""" + SELECT url, format FROM rss_feeds WHERE network=? AND channel=? + ORDER BY url + """, (network, channel)).fetchall() + return [{"url": r["url"], "format": r["format"]} for r in rows] + + +def rss_all_feeds(db): + """Return all feeds: list of {id, network, channel, url, format}.""" + rows = db.execute(""" + SELECT id, network, channel, url, format FROM rss_feeds + """).fetchall() + return [dict(r) for r in rows] + + +def rss_set_format(db, network, channel, url, fmt): + """Update format template for a feed. Returns True if the feed was found.""" + db.execute(""" + UPDATE rss_feeds SET format=? WHERE network=? AND channel=? AND url=? + """, (fmt, network, channel, url)) + db.commit() + return db.execute("SELECT changes()").fetchone()[0] > 0 + + +def rss_get_seen(db, feed_id): + rows = db.execute(""" + SELECT entry_id FROM rss_seen WHERE feed_id=? + """, (feed_id,)).fetchall() + return {r["entry_id"] for r in rows} + + +def rss_mark_seen(db, feed_id, entry_ids): + db.executemany(""" + INSERT OR IGNORE INTO rss_seen(feed_id, entry_id) VALUES (?,?) + """, [(feed_id, eid) for eid in entry_ids]) + # Keep only the most recent 500 per feed to avoid unbounded growth + db.execute(""" + DELETE FROM rss_seen WHERE feed_id=? AND entry_id NOT IN ( + SELECT entry_id FROM rss_seen WHERE feed_id=? + ORDER BY rowid DESC LIMIT 500 + ) + """, (feed_id, feed_id)) + db.commit() diff --git a/gitbot.toml.example b/gitbot.toml.example new file mode 100644 index 0000000..b8bcda8 --- /dev/null +++ b/gitbot.toml.example @@ -0,0 +1,74 @@ +# gitbot.toml — example configuration + +database = "gitbot.db" + +# Global outgoing bind address for IRC connections (IPv4 or IPv6). +# Useful if your host has multiple IPs and you want to control which one +# is used. Can be overridden per [[network]] with a local "bind" key. +# bind = "192.0.2.1" +# bind = "2001:db8::1" + + +# ── Webhook HTTP server ─────────────────────────────────────────────────────── +[webhook_server] +enabled = true +host = "127.0.0.1" # use "::" to listen on all IPv6+IPv4 interfaces +port = 8080 + +# Per-forge secrets. Two verification modes are supported — pick whichever +# is easier to configure in your forge's webhook settings UI: +# +# Mode 1 — URL token (simplest): +# Leave the forge's "secret" field empty, and just append ?secret=... to +# the payload URL: +# https://yourhost/github?secret=changeme-github +# https://yourhost/gitea?secret=changeme-gitea +# https://yourhost/gitlab?secret=changeme-gitlab +# +# Mode 2 — HMAC header (more secure, secret never appears in URLs/logs): +# Set the secret here AND in the forge's webhook settings. +# GitHub/Gitea: HMAC-SHA256 of the body. +# GitLab: token compared directly against X-Gitlab-Token. +# +# Both modes use the same secret value below. Omit or leave empty to +# accept all requests without verification (only for trusted networks). +# +# github_secret = "changeme-github" +# gitea_secret = "changeme-gitea" +# gitlab_secret = "changeme-gitlab" + + +# ── RSS poller ──────────────────────────────────────────────────────────────── +[rss] +enabled = true +interval = 300 # seconds between polls + + +# ── IRC networks ────────────────────────────────────────────────────────────── + +[[network]] +name = "libera" +host = "irc.libera.chat" +port = 6697 +tls = true +nickname = "gitbot" +username = "gitbot" +realname = "git webhook + RSS bot" +channels = ["#myproject", "#ops"] + +# Per-network bind override (optional — overrides the global bind above) +# bind = "192.0.2.2" + +# nickserv_password = "hunter2" +# sasl_plain = { user = "gitbot", password = "hunter2" } + + +[[network]] +name = "hackint" +host = "irc.hackint.org" +port = 6697 +tls = true +nickname = "gitbot" +username = "gitbot" +realname = "git webhook + RSS bot" +channels = ["#myproject"] diff --git a/irc_client.py b/irc_client.py new file mode 100644 index 0000000..86e0a7f --- /dev/null +++ b/irc_client.py @@ -0,0 +1,233 @@ +"""Async IRC client for one network connection.""" + +import asyncio +import logging +import ssl +import time +from typing import Callable, Optional + +log = logging.getLogger("irc") + +RECONNECT_DELAY_MIN = 5 +RECONNECT_DELAY_MAX = 300 + + +class IRCClient: + def __init__(self, config: dict, on_message: Callable, + on_connected: Callable): + """ + config keys: + name, host, port, tls (bool), nickname, username, realname, + sasl_plain (optional: {user, password}), + nickserv_password (optional), + channels (list of str) + """ + self.config = config + self.name = config["name"] + self.on_message = on_message # async fn(network, channel, nick, msg) + self.on_connected = on_connected # async fn(network) + + self._writer: Optional[asyncio.StreamWriter] = None + self._channels: set = set() + self._reconnect_delay = RECONNECT_DELAY_MIN + self._running = True + self._ready = False + + # ── Public API ──────────────────────────────────────────────────────────── + + async def run(self): + while self._running: + try: + await self._connect() + except Exception as e: + log.warning("[%s] Connection error: %s", self.name, e) + if not self._running: + break + log.info("[%s] Reconnecting in %ds…", self.name, self._reconnect_delay) + await asyncio.sleep(self._reconnect_delay) + self._reconnect_delay = min( + self._reconnect_delay * 2, RECONNECT_DELAY_MAX) + + async def stop(self, message: str = "Disconnecting"): + """Gracefully disconnect and stop the reconnect loop.""" + self._running = False + if self._writer: + try: + self.send(f"QUIT :{message}") + await asyncio.sleep(0.5) + self._writer.close() + except Exception: + pass + + def send(self, line: str): + if self._writer: + log.debug("[%s] >> %s", self.name, line) + self._writer.write((line + "\r\n").encode()) + + def privmsg(self, target: str, text: str): + # IRC lines are limited to 512 bytes including \r\n; split if needed + prefix = f"PRIVMSG {target} :" + limit = 510 - len(prefix.encode()) + encoded = text.encode("utf-8", errors="replace") + while encoded: + chunk, encoded = encoded[:limit], encoded[limit:] + self.send(prefix + chunk.decode("utf-8", errors="replace")) + + def join(self, channel: str): + self.send(f"JOIN {channel}") + + def in_channel(self, channel: str) -> bool: + return channel.lower() in self._channels + + # ── Internal ────────────────────────────────────────────────────────────── + + async def _connect(self): + host = self.config["host"] + port = self.config["port"] + use_tls = self.config.get("tls", False) + + log.info("[%s] Connecting to %s:%d (tls=%s)…", self.name, host, port, use_tls) + + # Optional local bind address — per-network "bind" overrides global "_global_bind" + bind = self.config.get("bind") or self.config.get("_global_bind") + local_addr = (bind, 0) if bind else None + + if use_tls: + ctx = ssl.create_default_context() + if not self.config.get("tls_verify", True): + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + reader, writer = await asyncio.open_connection( + host, port, ssl=ctx, local_addr=local_addr) + else: + reader, writer = await asyncio.open_connection( + host, port, local_addr=local_addr) + + self._writer = writer + self._channels = set() + self._ready = False + + nick = self.config["nickname"] + username = self.config.get("username", nick) + realname = self.config.get("realname", nick) + + # SASL PLAIN + if "sasl_plain" in self.config: + self.send("CAP REQ :sasl") + + self.send(f"NICK {nick}") + self.send(f"USER {username} 0 * :{realname}") + + try: + async for raw in self._read_lines(reader): + await self._handle(raw) + finally: + writer.close() + self._writer = None + self._ready = False + + async def _read_lines(self, reader: asyncio.StreamReader): + buf = b"" + while True: + try: + data = await asyncio.wait_for(reader.read(4096), timeout=300) + except asyncio.TimeoutError: + log.warning("[%s] Read timeout, disconnecting", self.name) + return + if not data: + log.info("[%s] Connection closed by server", self.name) + return + buf += data + while b"\n" in buf: + line, buf = buf.split(b"\n", 1) + yield line.rstrip(b"\r").decode("utf-8", errors="replace") + + async def _handle(self, raw: str): + log.debug("[%s] << %s", self.name, raw) + + if raw.startswith("PING"): + token = raw.split(":", 1)[-1] if ":" in raw else raw.split(" ", 1)[-1] + self.send(f"PONG :{token}") + return + + parts = raw.split(" ") + + # CAP negotiation for SASL + if len(parts) >= 3 and parts[1] == "CAP": + sub = parts[3] if len(parts) > 3 else "" + if sub in ("ACK", ":ACK") and "sasl" in raw: + self.send("AUTHENTICATE PLAIN") + return + + if len(parts) >= 2 and parts[0] == "AUTHENTICATE": + sasl = self.config["sasl_plain"] + import base64 + token = base64.b64encode( + f"\x00{sasl['user']}\x00{sasl['password']}".encode() + ).decode() + self.send(f"AUTHENTICATE {token}") + return + + # Numeric: 903 = SASL success + if len(parts) >= 2 and parts[1] == "903": + self.send("CAP END") + return + + # Numeric: 904/905 = SASL fail + if len(parts) >= 2 and parts[1] in ("904", "905"): + log.error("[%s] SASL authentication failed!", self.name) + self.send("CAP END") + return + + # 001 = welcome → we're registered + if len(parts) >= 2 and parts[1] == "001": + self._reconnect_delay = RECONNECT_DELAY_MIN + log.info("[%s] Registered as %s", self.name, self.config["nickname"]) + + if "nickserv_password" in self.config: + ns_pw = self.config["nickserv_password"] + self.send(f"PRIVMSG NickServ :IDENTIFY {ns_pw}") + + for ch in self.config.get("channels", []): + self.join(ch) + + self._ready = True + await self.on_connected(self.name) + return + + # JOIN + if len(parts) >= 3 and parts[1] == "JOIN": + channel = parts[2].lstrip(":") + nick_part = parts[0].lstrip(":") + joiner = nick_part.split("!")[0] + own_nick = self.config["nickname"] + if joiner.lower() == own_nick.lower(): + self._channels.add(channel.lower()) + log.info("[%s] Joined %s", self.name, channel) + return + + # KICK / PART (our own) + if len(parts) >= 3 and parts[1] in ("KICK", "PART"): + channel = parts[2].lstrip(":") + self._channels.discard(channel.lower()) + if parts[1] == "KICK": + # rejoin if kicked + await asyncio.sleep(5) + self.join(channel) + return + + # PRIVMSG + if len(parts) >= 4 and parts[1] == "PRIVMSG": + full_prefix = parts[0].lstrip(":") + nick = full_prefix.split("!")[0] + target = parts[2] + text = " ".join(parts[3:]).lstrip(":") + await self.on_message(self.name, target, nick, full_prefix, text) + return + + # 433 = nick in use + if len(parts) >= 2 and parts[1] == "433": + new_nick = self.config["nickname"] + "_" + log.warning("[%s] Nick in use, trying %s", self.name, new_nick) + self.send(f"NICK {new_nick}") + return diff --git a/irc_format.py b/irc_format.py new file mode 100644 index 0000000..9a89083 --- /dev/null +++ b/irc_format.py @@ -0,0 +1,39 @@ +"""IRC formatting helpers — colors, bold, etc.""" + +BOLD = "\x02" +COLOR = "\x03" +RESET = "\x0F" + +# IRC color codes +WHITE = 0 +BLACK = 1 +BLUE = 2 +GREEN = 3 +RED = 4 +BROWN = 5 +PURPLE = 6 +ORANGE = 7 +YELLOW = 8 +LIGHTGREEN = 9 +CYAN = 10 +LIGHTCYAN = 11 +LIGHTBLUE = 12 +PINK = 13 +GREY = 14 +LIGHTGREY = 15 + +# Semantic aliases used by webhook formatters +COLOR_BRANCH = ORANGE +COLOR_REPO = GREY +COLOR_POSITIVE = GREEN +COLOR_NEGATIVE = RED +COLOR_NEUTRAL = LIGHTGREY +COLOR_ID = PINK + + +def color(s: str, fg: int) -> str: + return f"{COLOR}{fg:02d}{s}{COLOR}" + + +def bold(s: str) -> str: + return f"{BOLD}{s}{BOLD}" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..37dec8b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +aiohttp>=3.9 +feedparser>=6.0 +# tomllib is in stdlib for Python 3.11+ +# For Python 3.9/3.10, uncomment: +# tomli>=2.0 diff --git a/rss.py b/rss.py new file mode 100644 index 0000000..9020236 --- /dev/null +++ b/rss.py @@ -0,0 +1,217 @@ +"""RSS/Atom feed poller.""" + +import asyncio +import hashlib +import html +import logging +import re +from string import Template +from typing import Callable + +import aiohttp +import feedparser + +import db + +log = logging.getLogger("rss") + +DEFAULT_INTERVAL = 300 # 5 minutes +DEFAULT_FORMAT = "$feed_name: $title <$link>" + +# Template variables available in format strings: +# +# From the feed entry (feedparser field names): +# $title entry title, HTML stripped +# $link entry URL +# $description entry summary/description, HTML stripped +# $author entry author +# $published publication date string (as-is from feedparser) +# $id entry unique ID +# ... any other feedparser entry field +# +# From the feed itself (prefixed with feed_): +# $feed_name feed title (e.g. "pegasus/testrepo") +# $feed_link feed URL +# $feed_author feed author +# $feed_subtitle feed subtitle/description +# ... any other feedparser feed.* field, prefixed with feed_ + + +_TAG_RE = re.compile(r"<[^>]+>") + +def _strip_html(text: str) -> str: + """Remove HTML tags and decode entities, collapse whitespace.""" + text = _TAG_RE.sub("", text) + text = html.unescape(text) + return " ".join(text.split()) + + +def _flatten_value(value) -> str: + """ + feedparser returns some fields as lists of dicts with type/value/href. + Resolve them to a plain string the same way Limnoria does: + prefer text/plain, then strip HTML from text/html, then fall back to href. + """ + if not isinstance(value, list): + if isinstance(value, str): + return value + return str(value) if value is not None else "" + + for item in value: + if isinstance(item, dict) and item.get("type") == "text/plain": + return item.get("value", "") + for item in value: + if isinstance(item, dict) and item.get("type") in \ + ("text/html", "application/xhtml+xml"): + if "value" in item: + return _strip_html(item["value"]) + if "href" in item: + return item["href"] + for item in value: + if isinstance(item, dict) and "href" in item: + return item["href"] + for item in value: + if isinstance(item, dict) and "value" in item: + return str(item["value"]) + return str(value) + + +def _build_vars(feed_meta: dict, entry) -> dict: + """ + Build the template substitution dict from a feedparser entry + and feed metadata dict. + """ + # Feed-level vars ($feed_*) + kwargs = {} + for k, v in feed_meta.items(): + if isinstance(v, str): + kwargs[f"feed_{k}"] = _strip_html(v) + elif isinstance(v, list): + kwargs[f"feed_{k}"] = _flatten_value(v) + + # Convenience alias — strip Gitea's "Feed of X" wrapper if present + raw_name = kwargs.get("feed_title", "") + m = re.match(r'^[Ff]eed\s+of\s+"?(.+?)"?\s*$', raw_name) + kwargs.setdefault("feed_name", m.group(1) if m else raw_name) + + # Entry-level vars — iterate feedparser entry attributes + entry_dict = dict(entry) if hasattr(entry, "items") else {} + for k, v in entry_dict.items(): + kwargs[k] = _flatten_value(v) if isinstance(v, list) \ + else (_strip_html(v) if isinstance(v, str) else str(v) if v is not None else "") + + # Ensure the most common keys always exist (empty string if absent) + for key in ("title", "link", "description", "author", "published", "id"): + kwargs.setdefault(key, "") + + # HTML-strip summary/title from their _detail counterparts if available + for key in ("title", "summary"): + detail = entry_dict.get(f"{key}_detail") + if isinstance(detail, dict) and detail.get("type") in \ + ("text/html", "application/xhtml+xml"): + kwargs[key] = _strip_html(detail.get("value", kwargs.get(key, ""))) + + # Map feedparser's "summary" to "description" if not already set + if not kwargs.get("description") and kwargs.get("summary"): + kwargs["description"] = kwargs["summary"] + + return kwargs + + +def _format_entry(template_str: str, feed_meta: dict, entry) -> str: + variables = _build_vars(feed_meta, entry) + try: + return Template(template_str).safe_substitute(variables) + except (KeyError, ValueError) as e: + log.warning("Bad RSS format template %r: %s — falling back", template_str, e) + title = variables.get("title", "") + link = variables.get("link", "") + name = variables.get("feed_name", "") + return f"{name}: {title} <{link}>" + + +def _entry_id(entry) -> str: + """Stable unique ID for a feedparser entry.""" + raw = entry.get("id") or entry.get("link") or entry.get("title") or "" + return "sha1:" + hashlib.sha1(raw.encode()).hexdigest() + + +class RSSPoller: + def __init__(self, database, deliver: Callable, + interval: int = DEFAULT_INTERVAL): + """deliver: async callable(network, channel, message)""" + self._db = database + self._deliver = deliver + self._interval = interval + + async def run(self): + while True: + await self._poll() + await asyncio.sleep(self._interval) + + async def _poll(self): + feeds = db.rss_all_feeds(self._db) + if not feeds: + return + log.debug("Polling %d RSS feed(s)", len(feeds)) + async with aiohttp.ClientSession( + timeout=aiohttp.ClientTimeout(total=30) + ) as session: + for feed in feeds: + try: + await self._fetch_and_deliver(session, feed) + except Exception as e: + log.exception("Unhandled error polling %s: %s", feed["url"], e) + + async def _fetch_and_deliver(self, session, feed: dict): + url = feed["url"] + feed_id = feed["id"] + network = feed["network"] + channel = feed["channel"] + fmt = feed.get("format", DEFAULT_FORMAT) + + try: + async with session.get(url) as resp: + content = await resp.text() + except Exception as e: + log.warning("Failed to fetch %s: %s", url, e) + return + + parsed = feedparser.parse(content) + entries = parsed.get("entries", []) + feed_meta = dict(parsed.get("feed", {})) + + if not entries: + log.debug("No entries in feed %s", url) + return + + seen = db.rss_get_seen(self._db, feed_id) + new_entries = [(eid, e) for e in reversed(entries) # oldest first + if (eid := _entry_id(e)) not in seen] + + if not new_entries: + log.debug("No new entries in %s", url) + return + + # First poll: show only the single most recent entry as a "hello", + # then mark everything seen to avoid a flood of history. + if not seen: + eid, entry = new_entries[-1] + await self._deliver(network, channel, + _format_entry(fmt, feed_meta, entry)) + db.rss_mark_seen(self._db, feed_id, [e[0] for e in new_entries]) + log.info("Initialised feed %s (%d entries), showed latest", + url, len(new_entries)) + return + + # Subsequent polls: deliver up to 3 new entries. + to_deliver = new_entries[:3] + for eid, entry in to_deliver: + await self._deliver(network, channel, + _format_entry(fmt, feed_meta, entry)) + log.debug("Delivered entry from %s: %s", + url, entry.get("title", "")) + + db.rss_mark_seen(self._db, feed_id, [e[0] for e in to_deliver]) + if len(new_entries) > 3: + log.info("%s had %d new entries, delivered 3", url, len(new_entries)) diff --git a/webhook_gitea.py b/webhook_gitea.py new file mode 100644 index 0000000..9435287 --- /dev/null +++ b/webhook_gitea.py @@ -0,0 +1,188 @@ +"""Gitea webhook payload parser.""" + +from irc_format import ( + color, bold, + COLOR_BRANCH, COLOR_ID, COLOR_POSITIVE, COLOR_NEGATIVE, + LIGHTBLUE, +) + +COMMENT_ACTIONS = { + "created": "commented", + "edited": "edited a comment", + "deleted": "deleted a comment", +} +RELEASE_ACTIONS = { + "updated": "published", + "published": "published", + "deleted": "deleted", +} + +EVENT_CATEGORIES = { + "ping": ["ping"], + "code": ["push"], + "pr-minimal": [ + "pull_request/opened", "pull_request/closed", "pull_request/reopened", + ], + "pr": [ + "pull_request/opened", "pull_request/closed", "pull_request/reopened", + "pull_request/edited", "pull_request/assigned", "pull_request/unassigned", + ], + "pr-all": ["pull_request"], + "issue-minimal": [ + "issues/opened", "issues/closed", "issues/reopened", "issues/deleted", + ], + "issue": [ + "issues/opened", "issues/closed", "issues/reopened", "issues/deleted", + "issues/edited", "issues/assigned", "issues/unassigned", "issue_comment", + ], + "issue-all": ["issues", "issue_comment"], + "repo": ["create", "delete", "release", "fork", "repository"], +} + + +def _short(h): + return h[:7] + + +def names(data, headers): + full_name = repo_user = repo_name = organisation = None + if "repository" in data: + full_name = data["repository"]["full_name"] + repo_user, repo_name = full_name.split("/", 1) + if "organization" in data: + organisation = data["organization"]["login"] + return full_name, repo_user, repo_name, organisation + + +def branch(data, headers): + if "ref" in data: + return data["ref"].rpartition("/")[2] + return None + + +def is_private(data, headers): + return data.get("repository", {}).get("private", False) + + +def event(data, headers): + ev = headers.get("X-Gitea-Event", "") + action = data.get("action") + parts = [ev] + if action: + parts.append(f"{ev}/{action}") + return parts + + +def event_categories(ev): + return EVENT_CATEGORIES.get(ev, [ev]) + + +def parse(full_name, ev, data, headers): + dispatch = { + "push": _push, + "pull_request": _pull_request, + "issues": _issues, + "issue_comment": _issue_comment, + "create": _create, + "delete": _delete, + "repository": lambda fn, d: [], + "release": _release, + "fork": _fork, + "ping": lambda fn, d: [("Received new webhook", None)], + } + fn = dispatch.get(ev) + if fn: + return fn(full_name, data) + return [] + + +def _push(full_name, data): + branch_str = color(data["ref"].rpartition("/")[2], COLOR_BRANCH) + author = bold(data["pusher"]["login"]) + commits = data.get("commits", []) + outputs = [] + if len(commits) <= 3: + for c in commits: + h = color(_short(c["id"]), COLOR_ID) + msg = c["message"].split("\n")[0].strip() + outputs.append((f"{author} pushed {h} to {branch_str}: {msg}", c["url"])) + else: + url = data.get("compare_url") + outputs.append((f"{author} pushed {len(commits)} commits to {branch_str}", url)) + return outputs + + +def _pull_request(full_name, data): + pr = data["pull_request"] + num = color(f"#{pr['number']}", COLOR_ID) + action = data["action"] + branch_str = color(pr["base"]["ref"], COLOR_BRANCH) + author = bold(data["sender"]["login"]) + title = pr["title"] + url = pr["html_url"] + + if action == "opened": + desc = f"requested {num} merge into {branch_str}" + elif action == "closed": + if pr.get("merged"): + desc = f"{color('merged', COLOR_POSITIVE)} {num} into {branch_str}" + else: + desc = f"{color('closed', COLOR_NEGATIVE)} {num}" + elif action == "ready_for_review": + desc = f"marked {num} ready for review" + elif action == "synchronize": + desc = f"committed to {num}" + else: + desc = f"{action} {num}" + + return [(f"[PR] {author} {desc}: {title}", url)] + + +def _issues(full_name, data): + num = color(f"#{data['issue']['number']}", COLOR_ID) + action = data["action"] + title = data["issue"]["title"] + author = bold(data["sender"]["login"]) + url = f"{data['repository']['html_url']}/issues/{data['issue']['number']}" + return [(f"[issue] {author} {action} {num}: {title}", url)] + + +def _issue_comment(full_name, data): + if "changes" in data: + if data["changes"].get("body", {}).get("from") == data["comment"]["body"]: + return [] + num = color(f"#{data['issue']['number']}", COLOR_ID) + action = data["action"] + title = data["issue"]["title"] + type_ = "PR" if data["issue"].get("pull_request") else "issue" + commenter = bold(data["sender"]["login"]) + url = data["comment"]["html_url"] + return [(f"[{type_}] {commenter} {COMMENT_ACTIONS[action]} on {num}: {title}", url)] + + +def _create(full_name, data): + ref = color(data["ref"], COLOR_BRANCH) + sender = bold(data["sender"]["login"]) + return [(f"{sender} created a {data['ref_type']}: {ref}", None)] + + +def _delete(full_name, data): + ref = color(data["ref"], COLOR_BRANCH) + sender = bold(data["sender"]["login"]) + return [(f"{sender} deleted a {data['ref_type']}: {ref}", None)] + + +def _release(full_name, data): + action = RELEASE_ACTIONS.get(data["action"], data["action"]) + name = data["release"].get("name") or "" + if name: + name = f": {name}" + author = bold(data["release"]["author"]["login"]) + return [(f"{author} {action} a release{name}", None)] + + +def _fork(full_name, data): + forker = bold(data["sender"]["login"]) + fork_name = color(data["repository"]["full_name"], LIGHTBLUE) + url = data["repository"]["html_url"] + return [(f"{forker} forked into {fork_name}", url)] diff --git a/webhook_github.py b/webhook_github.py new file mode 100644 index 0000000..7806b3d --- /dev/null +++ b/webhook_github.py @@ -0,0 +1,286 @@ +"""GitHub webhook payload parser.""" + +from irc_format import ( + color, bold, + COLOR_BRANCH, COLOR_ID, COLOR_POSITIVE, COLOR_NEGATIVE, COLOR_NEUTRAL, + LIGHTBLUE, PURPLE, RED, +) + +COMMIT_URL = "https://github.com/%s/commit/%s" +COMMIT_RANGE_URL = "https://github.com/%s/compare/%s...%s" +CREATE_URL = "https://github.com/%s/tree/%s" +PR_URL = "https://github.com/%s/pull/%s" + +COMMENT_MAX = 100 +COMMENT_ACTIONS = { + "created": "commented", + "edited": "edited a comment", + "deleted": "deleted a comment", +} + +EVENT_CATEGORIES = { + "ping": ["ping"], + "code": ["push", "commit_comment"], + "pr-minimal": [ + "pull_request/opened", "pull_request/closed", "pull_request/reopened", + ], + "pr": [ + "pull_request/opened", "pull_request/closed", "pull_request/reopened", + "pull_request/edited", "pull_request/assigned", + "pull_request/unassigned", "pull_request_review", + "pull_request/locked", "pull_request/unlocked", + "pull_request_review_comment", + ], + "pr-all": ["pull_request", "pull_request_review", "pull_request_review_comment"], + "issue-minimal": [ + "issues/opened", "issues/closed", "issues/reopened", + "issues/deleted", "issues/transferred", + ], + "issue": [ + "issues/opened", "issues/closed", "issues/reopened", "issues/deleted", + "issues/edited", "issues/assigned", "issues/unassigned", + "issues/locked", "issues/unlocked", "issues/transferred", + "issue_comment", + ], + "issue-all": ["issues", "issue_comment"], + "repo": ["create", "delete", "release", "fork"], + "star": ["watch"], +} + + +def _short(h): + return h[:7] + + +def _comment(s): + line = s.split("\n")[0].strip() + left, right = line[:COMMENT_MAX], line[COMMENT_MAX:] + if not right: + return left + if " " in left: + left = left.rsplit(" ", 1)[0] + return f"{left}[...]" + + +def names(data, headers): + full_name = repo_user = repo_name = organisation = None + if "repository" in data: + full_name = data["repository"]["full_name"] + repo_user, repo_name = full_name.split("/", 1) + if "organization" in data: + organisation = data["organization"]["login"] + return full_name, repo_user, repo_name, organisation + + +def branch(data, headers): + if "ref" in data: + return data["ref"].rpartition("/")[2] + return None + + +def is_private(data, headers): + return data.get("repository", {}).get("private", False) + + +def event(data, headers): + ev = headers.get("X-GitHub-Event", "") + action = data.get("action") + category = None + if "review" in data and "state" in data.get("review", {}): + category = f"{ev}+{data['review']['state']}" + elif "check_suite" in data and "conclusion" in data.get("check_suite", {}): + category = f"{ev}+{data['check_suite']['conclusion']}" + parts = [ev] + if action: + parts.append(f"{ev}/{action}") + if category: + parts.append(category) + if action: + parts.append(f"{category}/{action}") + return parts + + +def event_categories(ev): + return EVENT_CATEGORIES.get(ev, [ev]) + + +def parse(full_name, ev, data, headers): + """Return list of (message, url) tuples.""" + dispatch = { + "push": _push, + "commit_comment": _commit_comment, + "pull_request": _pull_request, + "pull_request_review": _pr_review, + "pull_request_review_comment": _pr_review_comment, + "issue_comment": _issue_comment, + "issues": _issues, + "create": _create, + "delete": _delete, + "release": _release, + "fork": _fork, + "ping": lambda fn, d: [("Received new webhook", None)], + "watch": lambda fn, d: [(f"{d['sender']['login']} starred the repository", None)], + "membership": lambda fn, d: [(f"{d['sender']['login']} {d['action']} {d['member']['login']} to team {d['team']['name']}", None)], + } + fn = dispatch.get(ev) + if fn: + return fn(full_name, data) + return [] + + +def _format_push(branch_str, author, commits, forced, single_url, range_url): + outputs = [] + forced_str = f"{color('force', RED)} " if forced else "" + if not commits and forced: + return [(f"{author} {forced_str}pushed to {branch_str}", None)] + if len(commits) <= 3: + for c in commits: + h = color(_short(c["id"]), COLOR_ID) + msg = c["message"].split("\n")[0].strip() + url = single_url % c["id"] + outputs.append((f"{author} {forced_str}pushed {h} to {branch_str}: {msg}", url)) + else: + url = range_url + outputs.append((f"{author} {forced_str}pushed {len(commits)} commits to {branch_str}", url)) + return outputs + + +def _push(full_name, data): + branch_str = color(data["ref"].split("/", 2)[2], COLOR_BRANCH) + author = bold(data["pusher"]["name"]) + forced = data.get("forced", False) + commits = data.get("commits", []) + range_url = None + if commits: + range_url = COMMIT_RANGE_URL % (full_name, data["before"], commits[-1]["id"]) + single_url = COMMIT_URL % (full_name, "%s") + return _format_push(branch_str, author, commits, forced, single_url, range_url) + + +def _commit_comment(full_name, data): + action = data["action"] + commit = _short(data["comment"]["commit_id"]) + commenter = bold(data["comment"]["user"]["login"]) + url = data["comment"]["html_url"] + return [(f"[commit/{commit}] {commenter} {action} a comment", url)] + + +def _pull_request(full_name, data): + pr = data["pull_request"] + raw_num = pr["number"] + num = color(f"#{raw_num}", COLOR_ID) + author = bold(pr["user"]["login"]) + sender = bold(data["sender"]["login"]) + branch_str = color(pr["base"]["ref"], COLOR_BRANCH) + action = data["action"] + title = pr["title"] + url = pr["html_url"] + + if action == "opened": + desc = f"requested {num} merge into {branch_str}" + elif action == "closed": + if pr.get("merged"): + desc = f"{color('merged', COLOR_POSITIVE)} {num} by {author} into {branch_str}" + else: + desc = f"{color('closed', COLOR_NEGATIVE)} {num} by {author}" + elif action == "ready_for_review": + desc = f"marked {num} ready for review" + elif action == "synchronize": + desc = f"committed to {num} by {author}" + elif action == "labeled": + desc = f"labeled {num} as '{data['label']['name']}'" + elif action == "edited" and "title" in data.get("changes", {}): + desc = f"renamed {num}" + else: + desc = f"{action} {num} by {author}" + + return [(f"[PR] {sender} {desc}: {title}", url)] + + +def _pr_review(full_name, data): + if data["action"] != "submitted": + return [] + review = data["review"] + if "submitted_at" not in review: + return [] + state = review["state"] + if state == "commented": + return [] + num = color(f"#{data['pull_request']['number']}", COLOR_ID) + title = data["pull_request"]["title"] + reviewer = bold(data["sender"]["login"]) + url = review["html_url"] + state_map = { + "approved": "approved changes", + "changes_requested": "requested changes", + "dismissed": "dismissed a review", + } + return [(f"[PR] {reviewer} {state_map.get(state, state)} on {num}: {title}", url)] + + +def _pr_review_comment(full_name, data): + num = color(f"#{data['pull_request']['number']}", COLOR_ID) + action = data["action"] + title = data["pull_request"]["title"] + sender = bold(data["sender"]["login"]) + url = data["comment"]["html_url"] + return [(f"[PR] {sender} {COMMENT_ACTIONS[action]} on a review on {num}: {title}", url)] + + +def _issues(full_name, data): + num = color(f"#{data['issue']['number']}", COLOR_ID) + action = data["action"] + if action == "labeled": + action_str = f"labeled {num} as '{data['label']['name']}'" + elif action == "edited" and "title" in data.get("changes", {}): + action_str = f"renamed {num}" + else: + action_str = f"{action} {num}" + author = bold(data["sender"]["login"]) + title = data["issue"]["title"] + url = data["issue"]["html_url"] + return [(f"[issue] {author} {action_str}: {title}", url)] + + +def _issue_comment(full_name, data): + if "changes" in data: + if data["changes"].get("body", {}).get("from") == data["comment"]["body"]: + return [] + num = color(f"#{data['issue']['number']}", COLOR_ID) + action = data["action"] + title = data["issue"]["title"] + type_ = "PR" if "pull_request" in data["issue"] else "issue" + commenter = bold(data["sender"]["login"]) + url = data["comment"]["html_url"] + body = f": {_comment(data['comment']['body'])}" if action != "deleted" else "" + return [(f"[{type_}] {commenter} {COMMENT_ACTIONS[action]} on {num} ({title}){body}", url)] + + +def _create(full_name, data): + ref = color(data["ref"], COLOR_BRANCH) + sender = bold(data["sender"]["login"]) + url = CREATE_URL % (full_name, data["ref"]) + return [(f"{sender} created a {data['ref_type']}: {ref}", url)] + + +def _delete(full_name, data): + ref = color(data["ref"], COLOR_BRANCH) + sender = bold(data["sender"]["login"]) + return [(f"{sender} deleted a {data['ref_type']}: {ref}", None)] + + +def _release(full_name, data): + action = data["action"] + name = data["release"].get("name") or "" + if name: + name = f": {name}" + author = bold(data["release"]["author"]["login"]) + url = data["release"]["html_url"] + return [(f"{author} {action} a release{name}", url)] + + +def _fork(full_name, data): + forker = bold(data["sender"]["login"]) + fork_name = color(data["forkee"]["full_name"], LIGHTBLUE) + url = data["forkee"]["html_url"] + return [(f"{forker} forked into {fork_name}", url)] diff --git a/webhook_gitlab.py b/webhook_gitlab.py new file mode 100644 index 0000000..b6d0496 --- /dev/null +++ b/webhook_gitlab.py @@ -0,0 +1,196 @@ +"""GitLab webhook payload parser.""" + +from irc_format import ( + color, bold, + COLOR_BRANCH, COLOR_ID, COLOR_POSITIVE, COLOR_NEGATIVE, +) + +EVENT_CATEGORIES = { + "ping": ["ping"], + "code": ["push"], + "pr-minimal": [ + "merge_request/open", "merge_request/close", + "merge_request/reopen", "merge_request/merge", + ], + "pr": [ + "merge_request/open", "merge_request/close", + "merge_request/reopen", "merge_request/update", "merge_request/merge", + "note+mergerequest", "confidential_note+mergerequest", + ], + "pr-all": ["merge_request", "note+mergerequest", "confidential_note+mergerequest"], + "issue-minimal": [ + "issue/open", "issue/close", "issue/reopen", + "confidential_issue/open", "confidential_issue/close", "confidential_issue/reopen", + ], + "issue": [ + "issue/open", "issue/close", "issue/reopen", "issue/update", + "confidential_issue/open", "confidential_issue/close", + "confidential_issue/reopen", "confidential_issue/update", + "note+issue", "confidential_note+issue", + ], + "issue-all": ["issue", "confidential_issue", "note+issue", "confidential_note+issue"], + "repo": ["tag_push"], +} + +ISSUE_ACTIONS = { + "open": "opened", "close": "closed", + "reopen": "reopened", "update": "updated", "merge": "merged", +} +WIKI_ACTIONS = { + "create": "created", "update": "updated", "delete": "deleted", +} + + +def _short(h): + return h[:7] + + +def names(data, headers): + if "project" in data: + full_name = data["project"]["path_with_namespace"] + else: + full_name = data.get("project_name", "").replace(" ", "") + parts = full_name.split("/", 1) + repo_user = parts[0] if parts else "" + repo_name = parts[1] if len(parts) > 1 else "" + organisation = None + if full_name.count("/") == 2: + organisation = repo_user + repo_user = full_name.rsplit("/", 1)[0] + return full_name, repo_user, repo_name, organisation + + +def branch(data, headers): + if "ref" in data: + return data["ref"].rpartition("/")[2] + return None + + +def is_private(data, headers): + project = data.get("project", {}) + return project.get("visibility_level", 0) != 20 + + +def event(data, headers): + ev = headers.get("X-GitLab-Event", "").rsplit(" ", 1)[0].lower().replace(" ", "_") + action = None + category = None + oa = data.get("object_attributes", {}) + if "action" in oa: + action = oa["action"] + if "noteable_type" in oa: + nt = oa["noteable_type"].lower() + category = f"{ev}+{nt}" + parts = [ev] + if action: + parts.append(f"{ev}/{action}") + if category: + parts.append(category) + if action: + parts.append(f"{category}/{action}") + return parts + + +def event_categories(ev): + return EVENT_CATEGORIES.get(ev, [ev]) + + +def parse(full_name, ev, data, headers): + dispatch = { + "push": _push, + "tag_push": _tag_push, + "merge_request": _merge_request, + "issue": _issues, + "confidential_issue": _issues, + "note": _note, + "confidential_note": _note, + "wiki_page": lambda fn, d: _wiki(d), + } + fn = dispatch.get(ev) + if fn: + return fn(full_name, data) + return [] + + +def _push(full_name, data): + branch_str = color(data["ref"].rpartition("/")[2], COLOR_BRANCH) + author = bold(data["user_username"]) + commits = data.get("commits", []) + outputs = [] + if len(commits) <= 3: + for c in commits: + h = color(_short(c["id"]), COLOR_ID) + msg = c["message"].split("\n")[0].strip() + outputs.append((f"{author} pushed {h} to {branch_str}: {msg}", c["url"])) + else: + outputs.append((f"{author} pushed {len(commits)} commits to {branch_str}", None)) + return outputs + + +def _tag_push(full_name, data): + after = data.get("after", "") + create = bool(after.strip("0")) + tag = color(data["ref"].rsplit("/", 1)[-1], COLOR_BRANCH) + author = bold(data["user_username"]) + action = "created" if create else "deleted" + return [(f"{author} {action} a tag: {tag}", None)] + + +def _merge_request(full_name, data): + oa = data["object_attributes"] + num = color(f"!{oa['iid']}", COLOR_ID) + action = oa["action"] + branch_str = color(oa["target_branch"], COLOR_BRANCH) + author = bold(data["user"]["username"]) + title = oa["title"] + url = oa["url"] + + if action == "open": + desc = f"requested {num} merge into {branch_str}" + elif action == "close": + desc = f"{color('closed', COLOR_NEGATIVE)} {num}" + elif action == "merge": + desc = f"{color('merged', COLOR_POSITIVE)} {num} into {branch_str}" + else: + desc = f"{ISSUE_ACTIONS.get(action, action)} {num}" + + return [(f"[MR] {author} {desc}: {title}", url)] + + +def _issues(full_name, data): + oa = data["object_attributes"] + if "action" not in oa: + return [] + num = color(f"#{oa['iid']}", COLOR_ID) + action = ISSUE_ACTIONS.get(oa["action"], oa["action"]) + title = oa["title"] + author = bold(data["user"]["username"]) + url = oa["url"] + return [(f"[issue] {author} {action} {num}: {title}", url)] + + +def _note(full_name, data): + oa = data["object_attributes"] + type_ = oa.get("noteable_type", "") + if type_ == "Issue" and "issue" in data: + obj = data["issue"] + label = "issue" + elif type_ == "MergeRequest" and "merge_request" in data: + obj = data["merge_request"] + label = "MR" + else: + return [] + num = color(f"#{obj['iid']}", COLOR_ID) + title = obj["title"] + commenter = bold(data["user"]["username"]) + url = oa["url"] + return [(f"[{label}] {commenter} commented on {num}: {title}", url)] + + +def _wiki(data): + oa = data["object_attributes"] + author = bold(data["user"]["username"]) + action = WIKI_ACTIONS.get(oa["action"], oa["action"]) + title = oa["title"] + url = oa["url"] + return [(f"{author} {action} a wiki page: {title}", url)] diff --git a/webhook_server.py b/webhook_server.py new file mode 100644 index 0000000..c9f72a6 --- /dev/null +++ b/webhook_server.py @@ -0,0 +1,194 @@ +"""Tiny async HTTP server for receiving git forge webhooks.""" + +import asyncio +import hashlib +import hmac +import json +import logging +import urllib.parse +from typing import Callable, Optional + +log = logging.getLogger("webhook") + +# Max payload size: 10 MB +MAX_BODY = 10 * 1024 * 1024 + + +class WebhookServer: + def __init__(self, host: str, port: int, deliver: Callable, + secrets: Optional[dict] = None): + """ + deliver: async callable(forge, headers, data) + forge is 'github' | 'gitea' | 'gitlab' + secrets: per-forge secrets, e.g.: + {'github': 'abc', 'gitea': 'xyz', 'gitlab': 'def'} + + Verification supports two modes (both optional, both can coexist): + + 1. URL token — append ?secret= to the webhook URL you + configure in the forge. Simplest to set up. + e.g. https://yourhost/github?secret=abc + + 2. HMAC header — configure the same secret in the forge's webhook + settings. GitHub/Gitea send HMAC-SHA256; GitLab + sends the token directly as X-Gitlab-Token. + + If a secret is configured for a forge, the URL token is checked first. + If that passes, the request is accepted without checking HMAC headers. + If no URL token is present, HMAC header verification is attempted. + If neither passes, the request is rejected with 403. + If no secret is configured for a forge, all requests are accepted. + """ + self._host = host + self._port = port + self._deliver = deliver + self._secrets = { + forge: s.encode() + for forge, s in (secrets or {}).items() + if s + } + + async def run(self): + server = await asyncio.start_server( + self._handle, self._host, self._port) + addr = server.sockets[0].getsockname() + log.info("Webhook server listening on %s:%d", *addr) + async with server: + await server.serve_forever() + + # ── HTTP parsing ────────────────────────────────────────────────────────── + + async def _handle(self, reader: asyncio.StreamReader, + writer: asyncio.StreamWriter): + try: + await self._dispatch(reader, writer) + except Exception as e: + log.exception("Error handling webhook request: %s", e) + self._respond(writer, 500, "Internal Server Error") + finally: + try: + writer.close() + except Exception: + pass + + async def _dispatch(self, reader, writer): + try: + request_line = (await asyncio.wait_for( + reader.readline(), timeout=10)).decode() + except asyncio.TimeoutError: + self._respond(writer, 408, "Request Timeout") + return + + parts = request_line.strip().split(" ") + if len(parts) < 2: + self._respond(writer, 400, "Bad Request") + return + + method, path = parts[0], parts[1] + + # Read headers + headers = {} + while True: + try: + line = (await asyncio.wait_for( + reader.readline(), timeout=10)).decode().strip() + except asyncio.TimeoutError: + self._respond(writer, 408, "Request Timeout") + return + if not line: + break + if ":" in line: + k, _, v = line.partition(":") + headers[k.strip()] = v.strip() + + if method != "POST": + self._respond(writer, 405, "Method Not Allowed") + return + + # Parse path and query string + # Supports /github?secret=abc, /gitea?secret=xyz, etc. + parsed = urllib.parse.urlparse(path) + clean_path = parsed.path.rstrip("/").lstrip("/").lower() + if clean_path not in ("github", "gitea", "gitlab"): + self._respond(writer, 404, "Not Found") + return + forge = clean_path + qs_params = urllib.parse.parse_qs(parsed.query) + url_secret = qs_params.get("secret", [None])[0] + + # Read body + content_length = int(headers.get("Content-Length", 0)) + if content_length > MAX_BODY: + self._respond(writer, 413, "Payload Too Large") + return + + try: + body = await asyncio.wait_for( + reader.read(content_length), timeout=30) + except asyncio.TimeoutError: + self._respond(writer, 408, "Request Timeout") + return + + # Verification — only enforced if a secret is configured for this forge + expected_secret = self._secrets.get(forge) + if expected_secret: + if url_secret is not None: + # Mode 1: ?secret= URL token — simple constant-time compare + if not hmac.compare_digest(url_secret.encode(), expected_secret): + log.warning("[%s] URL secret mismatch", forge) + self._respond(writer, 403, "Forbidden") + return + else: + # Mode 2: HMAC signature header + if not self._verify_hmac(forge, headers, body, expected_secret): + log.warning("[%s] HMAC signature verification failed", forge) + self._respond(writer, 403, "Forbidden") + return + + # Parse body + content_type = headers.get("Content-Type", "") + if "x-www-form-urlencoded" in content_type: + qs = urllib.parse.parse_qs(body.decode()) + raw = qs.get("payload", ["{}"])[0] + else: + raw = body.decode() + + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + log.warning("[%s] JSON parse error: %s", forge, e) + self._respond(writer, 400, "Bad JSON") + return + + self._respond(writer, 200, "OK") + asyncio.create_task(self._deliver(forge, headers, data)) + + @staticmethod + def _verify_hmac(forge: str, headers: dict, body: bytes, + secret: bytes) -> bool: + if forge == "github": + sig_header = headers.get("X-Hub-Signature-256", "") + if not sig_header.startswith("sha256="): + return False + expected = hmac.new(secret, body, hashlib.sha256).hexdigest() + return hmac.compare_digest(sig_header[7:], expected) + elif forge == "gitea": + sig_header = headers.get("X-Gitea-Signature", "") + expected = hmac.new(secret, body, hashlib.sha256).hexdigest() + return hmac.compare_digest(sig_header, expected) + elif forge == "gitlab": + token = headers.get("X-Gitlab-Token", "") + return hmac.compare_digest(token.encode(), secret) + return True + + @staticmethod + def _respond(writer, code: int, message: str): + body = message.encode() + response = ( + f"HTTP/1.1 {code} {message}\r\n" + f"Content-Length: {len(body)}\r\n" + f"Content-Type: text/plain\r\n" + f"Connection: close\r\n" + f"\r\n" + ).encode() + body + writer.write(response)