From 64ccef1989dba5966cbea3cce99e4038c0402a9e Mon Sep 17 00:00:00 2001 From: MP Lindsey Date: Wed, 18 Feb 2026 09:42:34 +0000 Subject: [PATCH 1/2] sysutils: add autorollback plugin New plugin: os-autorollback - Automatic configuration rollback with safe mode. Inspired by Juniper JUNOS "commit confirmed" and MikroTik RouterOS Safe Mode, this plugin provides automatic configuration rollback for OPNsense. When safe mode is activated, a timer begins counting down. If the administrator does not confirm the changes before the timer expires, the configuration is automatically rolled back to the pre-change state. Features: - Safe mode with configurable countdown timer (default 300s) - Three rollback triggers: timer expiry, connectivity watchdog, boot recovery - Connectivity watchdog monitors configurable targets every minute via cron - Early boot recovery via syshook detects crashed safe mode sessions - Dashboard widget with real-time countdown and one-click confirm/revert - Global banner on every page during active safe mode - Atomic config restore with safety backup and path traversal protection - Firmware update awareness (blocks rollback during upgrades) - Full MVC architecture with API endpoints and configd backend integration Co-Authored-By: Claude Opus 4.6 --- sysutils/autorollback/Makefile | 8 + sysutils/autorollback/pkg-descr | 26 + .../etc/inc/plugins.inc.d/autorollback.inc | 90 ++++ .../etc/rc.syshook.d/config/50-autorollback | 131 ++++++ .../early/10-autorollback-recovery | 203 ++++++++ .../AutoRollback/Api/ServiceController.php | 159 +++++++ .../AutoRollback/Api/SettingsController.php | 42 ++ .../OPNsense/AutoRollback/IndexController.php | 42 ++ .../OPNsense/AutoRollback/forms/general.xml | 80 ++++ .../models/OPNsense/AutoRollback/ACL/ACL.xml | 9 + .../OPNsense/AutoRollback/AutoRollback.php | 36 ++ .../OPNsense/AutoRollback/AutoRollback.xml | 89 ++++ .../OPNsense/AutoRollback/Menu/Menu.xml | 5 + .../views/OPNsense/AutoRollback/index.volt | 402 ++++++++++++++++ .../scripts/autorollback/lib/__init__.py | 28 ++ .../scripts/autorollback/lib/common.py | 444 ++++++++++++++++++ .../opnsense/scripts/autorollback/rollback.py | 377 +++++++++++++++ .../opnsense/scripts/autorollback/safemode.py | 368 +++++++++++++++ .../opnsense/scripts/autorollback/status.py | 145 ++++++ .../scripts/autorollback/timer_daemon.py | 198 ++++++++ .../opnsense/scripts/autorollback/watchdog.py | 354 ++++++++++++++ .../conf/actions.d/actions_autorollback.conf | 48 ++ .../templates/OPNsense/AutoRollback/+TARGETS | 0 .../opnsense/www/js/autorollback_banner.js | 305 ++++++++++++ .../opnsense/www/js/widgets/AutoRollback.js | 234 +++++++++ .../www/js/widgets/Metadata/AutoRollback.xml | 11 + 26 files changed, 3834 insertions(+) create mode 100644 sysutils/autorollback/Makefile create mode 100644 sysutils/autorollback/pkg-descr create mode 100644 sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc create mode 100755 sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback create mode 100755 sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/IndexController.php create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.xml create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml create mode 100644 sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt create mode 100644 sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py create mode 100644 sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py create mode 100755 sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py create mode 100755 sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py create mode 100755 sysutils/autorollback/src/opnsense/scripts/autorollback/status.py create mode 100755 sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py create mode 100755 sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py create mode 100644 sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf create mode 100644 sysutils/autorollback/src/opnsense/service/templates/OPNsense/AutoRollback/+TARGETS create mode 100644 sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js create mode 100644 sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js create mode 100644 sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml diff --git a/sysutils/autorollback/Makefile b/sysutils/autorollback/Makefile new file mode 100644 index 000000000..3e8a6d82a --- /dev/null +++ b/sysutils/autorollback/Makefile @@ -0,0 +1,8 @@ +PLUGIN_NAME= autorollback +PLUGIN_VERSION= 1.0 +PLUGIN_COMMENT= Automatic configuration rollback with safe mode +PLUGIN_MAINTAINER= github.immobile762@passmail.net +PLUGIN_WWW= https://github.com/mplind/os-autorollback +PLUGIN_TIER= 2 + +.include "../../Mk/plugins.mk" diff --git a/sysutils/autorollback/pkg-descr b/sysutils/autorollback/pkg-descr new file mode 100644 index 000000000..7c5bf3124 --- /dev/null +++ b/sysutils/autorollback/pkg-descr @@ -0,0 +1,26 @@ +Automatic configuration rollback plugin for OPNsense. + +Provides a "Safe Mode" that snapshots the current configuration before +changes are made. If the administrator does not confirm the changes within +a configurable timeout, the system automatically reverts to the previous +known-good configuration. + +Features: + +* Timer-based auto-revert with configurable timeout (default 120 seconds) +* Persistent countdown banner in the web UI for confirmation +* CLI confirmation via configctl for SSH users +* Always-on connectivity watchdog with configurable health checks +* Crash-safe: survives reboots via early boot recovery +* Dashboard widget showing real-time status +* Git backup integration (if os-git-backup is installed) +* Configurable rollback method: full reboot, service reload, or targeted restart + +Inspired by Juniper JUNOS "commit confirmed" and MikroTik Safe Mode. + +Plugin Changelog +================ + +1.0 + +* Initial release diff --git a/sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc b/sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc new file mode 100644 index 000000000..8966a9650 --- /dev/null +++ b/sysutils/autorollback/src/etc/inc/plugins.inc.d/autorollback.inc @@ -0,0 +1,90 @@ + [ + '/usr/local/sbin/configctl autorollback watchdog.check', + '*/1', // Every minute + ], + ], + ]; +} + +/** + * Register the auto-rollback service for the service manager. + * This allows starting/stopping/status via the Services page and API. + * + * @return array service definitions + */ +function autorollback_services() +{ + $mdl = new \OPNsense\AutoRollback\AutoRollback(); + + $services = []; + + if ((string)$mdl->general->Enabled == '1') { + $services[] = [ + 'description' => gettext('Auto Rollback Safe Mode'), + 'configd' => [ + 'restart' => ['autorollback safemode.start'], + 'start' => ['autorollback safemode.start'], + 'stop' => ['autorollback safemode.cancel'], + ], + 'name' => 'autorollback', + 'nocheck' => true, // No PID file to check — uses state files + ]; + } + + return $services; +} + +/** + * Register syslog facility for auto-rollback events. + * + * @return array syslog configuration + */ +function autorollback_syslog() +{ + return [ + 'autorollback' => [ + 'facility' => ['autorollback', 'autorollback-recovery'], + ], + ]; +} diff --git a/sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback b/sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback new file mode 100755 index 000000000..584101eb2 --- /dev/null +++ b/sysutils/autorollback/src/etc/rc.syshook.d/config/50-autorollback @@ -0,0 +1,131 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Config Change Hook (syshook/config) + +This script is called by OPNsense every time config.xml is saved. +It receives the backup file path as its first argument. + +Purpose: + 1. Record the config change for the connectivity watchdog + 2. Record BOTH the new backup AND the previous backup (for correct rollback target) + 3. Skip recording if a rollback restore is in progress (re-entrancy guard) + 4. Skip recording if a firmware update is in progress + +This script MUST be fast and lightweight — it runs synchronously +in the config save pipeline. +""" + +import json +import os +import sys +import time +import glob +import re + +# Paths +VOLATILE_DIR = '/var/run/autorollback' +RESTORE_LOCK = os.path.join(VOLATILE_DIR, 'restoring.lock') +LAST_CONFIG_FILE = os.path.join(VOLATILE_DIR, 'last_config_change') +FIRMWARE_LOCK = '/tmp/pkg_upgrade.progress' +CONFIG_BACKUP_DIR = '/conf/backup' + +# Same regex as common.py to match only timestamped backups +BACKUP_TIMESTAMP_RE = re.compile(r'^config-\d+(\.\d+)?(_\d+)?\.xml$') + + +def get_previous_backup(current_backup): + """ + Find the backup file that existed BEFORE the current one. + This is the correct rollback target for the watchdog. + """ + try: + backups = glob.glob(os.path.join(CONFIG_BACKUP_DIR, 'config-*.xml')) + backups = [b for b in backups if BACKUP_TIMESTAMP_RE.match(os.path.basename(b))] + backups.sort() + if current_backup and current_backup in backups: + idx = backups.index(current_backup) + if idx > 0: + return backups[idx - 1] + elif len(backups) >= 2: + # Current backup might not be in the list yet, return second-to-last + return backups[-2] + except Exception: + pass + return '' + + +def main(): + # Get backup file path from argument + backup_file = sys.argv[1] if len(sys.argv) > 1 else '' + + # Re-entrancy guard: skip if we're restoring a config + if os.path.isfile(RESTORE_LOCK): + # Check if lock is actually held (not stale) + import fcntl + fd = None + try: + fd = open(RESTORE_LOCK, 'r') + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + # Got lock = stale file, clean up + fcntl.flock(fd, fcntl.LOCK_UN) + try: + os.unlink(RESTORE_LOCK) + except OSError: + pass + except (BlockingIOError, OSError): + # Lock held = restore in progress, skip + return + finally: + if fd is not None: + fd.close() + + # Skip during firmware updates + if os.path.isfile(FIRMWARE_LOCK): + return + + # Ensure volatile directory exists + os.makedirs(VOLATILE_DIR, mode=0o750, exist_ok=True) + + # Find the previous backup (the one BEFORE this config change) + previous_backup = get_previous_backup(backup_file) + + # Record the config change for the watchdog + try: + state = { + 'time': time.time(), + 'backup': backup_file, + 'previous_backup': previous_backup, + } + with open(LAST_CONFIG_FILE, 'w') as f: + json.dump(state, f) + except (IOError, OSError): + pass # Non-critical — don't break the config save pipeline + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery b/sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery new file mode 100755 index 000000000..7d674fc25 --- /dev/null +++ b/sysutils/autorollback/src/etc/rc.syshook.d/early/10-autorollback-recovery @@ -0,0 +1,203 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Early Boot Recovery (syshook/early) + +This is the TERTIARY rollback trigger — the last line of defense. +It runs during early boot, BEFORE networking starts. + +Scenario: + 1. Admin enters safe mode + 2. Makes a config change that breaks something + 3. System crashes or reboots (or admin reboots to try to fix) + 4. System starts booting with the BAD config + 5. THIS SCRIPT fires before networking starts + 6. Detects the persistent state file with an expired timer + 7. Restores the known-good config.xml BEFORE any service reads it + 8. System boots with the known-good config + +The persistent state is stored at /conf/autorollback_pending.json +(on persistent storage, NOT tmpfs). + +This script must be FAST and SELF-CONTAINED — no external dependencies +beyond Python stdlib and the config file. +""" + +import json +import os +import shutil +import sys +import syslog +import tempfile +import time +import xml.etree.ElementTree as ET + +PERSISTENT_STATE = '/conf/autorollback_pending.json' +CONFIG_XML = '/conf/config.xml' +CONFIG_CACHE = '/tmp/config.cache' +CONFIG_BACKUP_DIR = '/conf/backup' + + +syslog.openlog('autorollback-recovery', syslog.LOG_PID, syslog.LOG_LOCAL4) + +def log(msg): + """Log to syslog.""" + try: + syslog.syslog(syslog.LOG_WARNING, msg) + except Exception: + pass + + +def validate_config(path): + """Quick validation of a config.xml file.""" + try: + tree = ET.parse(path) + root = tree.getroot() + return (root.tag in ('opnsense', 'pfsense') + and root.find('system') is not None + and root.find('interfaces') is not None) + except Exception: + return False + + +def validate_backup_path(path): + """Validate that backup_file is within allowed directories (defense-in-depth).""" + allowed = (CONFIG_BACKUP_DIR, os.path.dirname(CONFIG_XML)) + try: + real = os.path.realpath(path) + for d in allowed: + real_d = os.path.realpath(d) + if real.startswith(real_d + os.sep) or real == real_d: + return True + except (TypeError, ValueError): + pass + return False + + +def main(): + # Check for persistent state file + if not os.path.isfile(PERSISTENT_STATE): + return # No pending rollback — normal boot + + try: + with open(PERSISTENT_STATE, 'r') as f: + state = json.load(f) + except (json.JSONDecodeError, IOError): + return # Corrupt state file — skip + + # Only act on safe mode states + if state.get('mode') != 'safemode': + return + + # Check if the timer has expired + expiry = state.get('expiry_time', 0) + now = time.time() + + if now < expiry: + # Timer hasn't expired — don't rollback yet + # The timer daemon will handle it when cron starts + return + + # Timer expired! This means: + # - The system rebooted/crashed during safe mode + # - The timer daemon never got to fire (it was in /var/run, which is tmpfs) + # - We need to restore the known-good config NOW, before services start + + backup_file = state.get('backup_file', '') + if not backup_file or not validate_backup_path(backup_file) or not os.path.isfile(backup_file): + log('EARLY BOOT RECOVERY: Expired safe mode found but backup missing or invalid path: %s' % backup_file) + # Clean up the stale state + try: + os.unlink(PERSISTENT_STATE) + except OSError: + pass + return + + # Validate the backup + if not validate_config(backup_file): + log('EARLY BOOT RECOVERY: Backup file is invalid: %s' % backup_file) + try: + os.unlink(PERSISTENT_STATE) + except OSError: + pass + return + + # --- PERFORM EARLY BOOT ROLLBACK --- + log('=== EARLY BOOT RECOVERY: Safe mode expired %d seconds ago. Restoring config from %s ===' % ( + int(now - expiry), backup_file)) + + try: + # Create safety backup of current (bad) config + safety = os.path.join(CONFIG_BACKUP_DIR, 'config-pre-boot-recovery.xml') + if os.path.isfile(CONFIG_XML): + shutil.copy2(CONFIG_XML, safety) + + # Capture original ownership + try: + st = os.stat(CONFIG_XML) + orig_uid, orig_gid = st.st_uid, st.st_gid + except OSError: + orig_uid, orig_gid = 0, 0 + + # Restore the known-good config atomically via temp + rename + conf_dir = os.path.dirname(CONFIG_XML) + fd, tmp_path = tempfile.mkstemp(dir=conf_dir, prefix='.config_recovery_') + try: + os.close(fd) + shutil.copy2(backup_file, tmp_path) + os.chmod(tmp_path, 0o640) + try: + os.chown(tmp_path, orig_uid, orig_gid) + except PermissionError: + pass + os.rename(tmp_path, CONFIG_XML) + except Exception: + # Clean up temp file on failure + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + # Remove config cache + if os.path.isfile(CONFIG_CACHE): + os.unlink(CONFIG_CACHE) + + log('EARLY BOOT RECOVERY: Config restored successfully. System will boot with known-good config.') + + # Only clean up persistent state on successful recovery + try: + os.unlink(PERSISTENT_STATE) + except OSError: + pass + + except Exception as e: + log('EARLY BOOT RECOVERY FAILED: %s — state preserved for retry on next boot' % str(e)) + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php new file mode 100644 index 000000000..d4c140f39 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/ServiceController.php @@ -0,0 +1,159 @@ +request->isPost()) { + $backend = new Backend(); + + // Optional custom timeout from POST body + $timeout = $this->request->getPost('timeout', 'int', null); + $param = $timeout ? (string)$timeout : ''; + + $response = $backend->configdpRun('autorollback safemode.start', [$param]); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Confirm safe mode changes - accept the configuration. + * + * @return array result + */ + public function confirmAction() + { + if ($this->request->isPost()) { + $backend = new Backend(); + $response = $backend->configdRun('autorollback safemode.confirm'); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Cancel safe mode - rollback to previous config immediately. + * + * @return array result + */ + public function cancelAction() + { + if ($this->request->isPost()) { + $backend = new Backend(); + $response = $backend->configdRun('autorollback safemode.cancel'); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Extend the safe mode countdown timer. + * + * @return array result + */ + public function extendAction() + { + if ($this->request->isPost()) { + $backend = new Backend(); + + $seconds = $this->request->getPost('seconds', 'int', 60); + $response = $backend->configdpRun('autorollback safemode.extend', [(string)$seconds]); + $result = json_decode(trim($response), true); + + if ($result === null) { + return ['status' => 'error', 'message' => 'Backend returned invalid response']; + } + + return $result; + } + return ['status' => 'error', 'message' => 'POST required']; + } + + /** + * Get current auto-rollback status. + * + * @return array status information + */ + public function statusAction() + { + $backend = new Backend(); + $response = $backend->configdRun('autorollback status'); + $result = json_decode(trim($response), true); + + if ($result === null) { + return [ + 'status' => 'error', + 'message' => 'Backend returned invalid response', + 'system_state' => 'unknown', + ]; + } + + return $result; + } +} diff --git a/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php new file mode 100644 index 000000000..2ac69b426 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/Api/SettingsController.php @@ -0,0 +1,42 @@ +view->pick('OPNsense/AutoRollback/index'); + $this->view->generalForm = $this->getForm('general'); + } +} diff --git a/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml new file mode 100644 index 000000000..1a7b6682e --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/controllers/OPNsense/AutoRollback/forms/general.xml @@ -0,0 +1,80 @@ +
+ + autorollback.general.Enabled + + checkbox + Enable the auto-rollback safe mode and connectivity watchdog features. + + + header + + + + autorollback.general.SafeModeTimeout + + text + How many seconds to wait for confirmation before automatically rolling back. Default: 120 seconds. Range: 30-3600. + + + autorollback.general.RollbackMethod + + dropdown + How to apply the restored configuration. Full reboot is most reliable (recommended). Service reload is faster but may not apply kernel tunables or interface changes. + + + header + + + + autorollback.general.WatchdogEnabled + + checkbox + Enable the always-on connectivity watchdog. Monitors system health after config changes and auto-reverts if connectivity is lost. + + + autorollback.general.WatchdogGracePeriod + + text + Seconds to wait after a config change before running health checks. Allows services time to restart. Default: 60 seconds. + + + autorollback.general.WatchdogFailThreshold + + text + Number of consecutive failed health checks before triggering a rollback. Default: 3. + + + autorollback.general.WatchdogCheckCommand + + text + Shell command to run for connectivity verification. Use %gateway% as placeholder for the default gateway IP. Default: ping -c 1 -W 3 -t 5 %gateway% + + + autorollback.general.WatchdogCheckPattern + + text + Regex pattern to match in the check command output for a successful result. Default: "1 packets received" + + + autorollback.general.WatchdogCheckCommand2 + + text + Optional second health check command. Example: host google.com (DNS resolution test). Leave empty to disable. + + + autorollback.general.WatchdogCheckPattern2 + + text + Regex pattern for the secondary check command. + + + header + + + + autorollback.general.LogRollbacks + + checkbox + Log all safe mode and rollback events to syslog. + +
diff --git a/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml new file mode 100644 index 000000000..399124178 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/ACL/ACL.xml @@ -0,0 +1,9 @@ + + + WebCfg - Auto Rollback: Settings + + ui/autorollback/* + api/autorollback/* + + + diff --git a/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php new file mode 100644 index 000000000..70bef32fc --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/AutoRollback.php @@ -0,0 +1,36 @@ + + //OPNsense/autorollback + 1.0.0 + Auto Rollback configuration + + + + + 0 + Y + + + + + 120 + 30 + 3600 + Timeout must be between 30 and 3600 seconds. + + + + + reboot + + Full reboot (most reliable, recommended) + Service reload (faster, may miss kernel tunables) + + + + + + 0 + Y + + + + + 60 + 15 + 600 + Grace period must be between 15 and 600 seconds. + + + + + 3 + 1 + 10 + Fail threshold must be between 1 and 10. + + + + + ping -c 1 -W 3 -t 5 %gateway% + Y + /^.{1,512}$/ + Check command must be 1-512 characters. + + + + + 1 packets received + Y + /^.{1,256}$/ + Check pattern must be 1-256 characters. + + + + + + N + /^.{0,512}$/ + + + + + + N + /^.{0,256}$/ + + + + + 1 + Y + + + + diff --git a/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml new file mode 100644 index 000000000..b66bad86c --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/models/OPNsense/AutoRollback/Menu/Menu.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt b/sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt new file mode 100644 index 000000000..3eef46d1a --- /dev/null +++ b/sysutils/autorollback/src/opnsense/mvc/app/views/OPNsense/AutoRollback/index.volt @@ -0,0 +1,402 @@ +{# + OPNsense Auto Rollback - Settings & Safe Mode Control Page + + This page has two sections: + 1. Safe Mode control panel (top) - Start/Confirm/Cancel with live countdown + 2. Settings form (bottom) - Plugin configuration +#} + + + + + + +
+
+
+

Safe Mode

+
Make configuration changes safely with automatic rollback protection
+
+
+ Disabled +
+
+ + +
+

+ Enter safe mode to snapshot your current configuration before making changes. + If you don't confirm within the timeout, the system will automatically revert. +

+
+ +
+
+ SSH: configctl autorollback safemode.start +
+
+ + + + + + +
+ + + + +
+
+ {{ partial("layout_partials/base_form", ['fields': generalForm, 'id': 'frm_GeneralSettings']) }} + +
+
+ +
+
+
diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py new file mode 100644 index 000000000..2472936e0 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/__init__.py @@ -0,0 +1,28 @@ +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Shared library +""" diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py new file mode 100644 index 000000000..487cbeddd --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/lib/common.py @@ -0,0 +1,444 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Common library +Shared constants, state management, and utility functions. + +State architecture: + - Volatile state (cleared on reboot): /var/run/autorollback/ + * timer PID, active session flag, confirmation token + - Persistent state (survives reboot): /conf/autorollback_pending.json + * known-good backup path, expiry timestamp (for early-boot recovery) +""" + +import json +import os +import re +import sys +import time +import fcntl +import glob +import ipaddress +import shlex +import signal +import subprocess +import syslog +import secrets +import tempfile +import xml.etree.ElementTree as ET + +# --- Path constants --- +VOLATILE_DIR = '/var/run/autorollback' +PERSISTENT_STATE_FILE = '/conf/autorollback_pending.json' +TIMER_PID_FILE = os.path.join(VOLATILE_DIR, 'timer.pid') +RESTORE_LOCK_FILE = os.path.join(VOLATILE_DIR, 'restoring.lock') +SESSION_TOKEN_FILE = os.path.join(VOLATILE_DIR, 'session.token') +WATCHDOG_FAIL_COUNT_FILE = os.path.join(VOLATILE_DIR, 'watchdog_failures') +WATCHDOG_LAST_CONFIG_FILE = os.path.join(VOLATILE_DIR, 'last_config_change') + +CONFIG_XML = '/conf/config.xml' +CONFIG_BACKUP_DIR = '/conf/backup' +CONFIG_CACHE = '/tmp/config.cache' + +# Firmware update indicators +FIRMWARE_LOCK = '/tmp/pkg_upgrade.progress' +FIRMWARE_PROCS = ['opnsense-update', 'opnsense-bootstrap', 'opnsense-patch'] + +# Regex for valid timestamped backup filenames +BACKUP_TIMESTAMP_RE = re.compile(r'^config-\d+(\.\d+)?(_\d+)?\.xml$') + + +# --- Syslog setup (open once at module load, never close) --- +syslog.openlog('autorollback', syslog.LOG_PID, syslog.LOG_LOCAL4) + +def log_info(msg): + syslog.syslog(syslog.LOG_INFO, msg) + +def log_warning(msg): + syslog.syslog(syslog.LOG_WARNING, msg) + +def log_error(msg): + syslog.syslog(syslog.LOG_ERR, msg) + + +# --- Directory management --- +def ensure_volatile_dir(): + """Create the volatile state directory if it doesn't exist.""" + os.makedirs(VOLATILE_DIR, mode=0o750, exist_ok=True) + + +# --- Settings reader (single source of truth) --- +def read_model_settings(): + """Read all plugin settings from config.xml. Used by all scripts.""" + defaults = { + 'enabled': False, + 'timeout': 120, + 'rollback_method': 'reboot', + 'watchdog_enabled': False, + 'grace_period': 60, + 'fail_threshold': 3, + 'check_command': 'ping -c 1 -W 3 -t 5 %gateway%', + 'check_pattern': '1 packets received', + 'check_command_2': '', + 'check_pattern_2': '', + 'log_rollbacks': True, + } + try: + tree = ET.parse(CONFIG_XML) + root = tree.getroot() + ar = root.find('.//OPNsense/autorollback/general') + if ar is not None: + return { + 'enabled': (ar.findtext('Enabled', '0') == '1'), + 'timeout': int(ar.findtext('SafeModeTimeout', '120')), + 'rollback_method': ar.findtext('RollbackMethod', 'reboot'), + 'watchdog_enabled': (ar.findtext('WatchdogEnabled', '0') == '1'), + 'grace_period': int(ar.findtext('WatchdogGracePeriod', '60')), + 'fail_threshold': int(ar.findtext('WatchdogFailThreshold', '3')), + 'check_command': ar.findtext('WatchdogCheckCommand', + 'ping -c 1 -W 3 -t 5 %gateway%'), + 'check_pattern': ar.findtext('WatchdogCheckPattern', + '1 packets received'), + 'check_command_2': ar.findtext('WatchdogCheckCommand2', ''), + 'check_pattern_2': ar.findtext('WatchdogCheckPattern2', ''), + 'log_rollbacks': (ar.findtext('LogRollbacks', '1') == '1'), + } + except Exception as e: + log_warning('Could not read model settings: %s' % str(e)) + return defaults + + +# --- Persistent state management --- +def read_persistent_state(): + """Read the persistent state file. Returns dict or None.""" + try: + if os.path.isfile(PERSISTENT_STATE_FILE): + with open(PERSISTENT_STATE_FILE, 'r') as f: + return json.load(f) + except (json.JSONDecodeError, IOError, OSError) as e: + log_warning('Failed to read persistent state: %s' % str(e)) + return None + +def write_persistent_state(state): + """Write persistent state atomically using temp file + rename.""" + dir_name = os.path.dirname(PERSISTENT_STATE_FILE) + fd_num = None + tmp_path = None + try: + fd_num, tmp_path = tempfile.mkstemp(dir=dir_name, prefix='.autorollback_') + with os.fdopen(fd_num, 'w') as f: + fd_num = None # os.fdopen takes ownership + json.dump(state, f, indent=2) + f.flush() + os.fsync(f.fileno()) + os.rename(tmp_path, PERSISTENT_STATE_FILE) + tmp_path = None # Rename succeeded + except (IOError, OSError) as e: + log_error('Failed to write persistent state: %s' % str(e)) + if tmp_path and os.path.isfile(tmp_path): + os.unlink(tmp_path) + raise + finally: + if fd_num is not None: + os.close(fd_num) + +def clear_persistent_state(): + """Remove the persistent state file.""" + try: + if os.path.isfile(PERSISTENT_STATE_FILE): + os.unlink(PERSISTENT_STATE_FILE) + except OSError: + pass + + +# --- Session token management --- +def generate_session_token(): + """Generate a cryptographically random session token for safe mode.""" + token = secrets.token_hex(32) + ensure_volatile_dir() + fd = os.open(SESSION_TOKEN_FILE, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600) + try: + os.write(fd, token.encode()) + finally: + os.close(fd) + return token + +def read_session_token(): + """Read the current session token, or None.""" + try: + if os.path.isfile(SESSION_TOKEN_FILE): + with open(SESSION_TOKEN_FILE, 'r') as f: + return f.read().strip() + except (IOError, OSError): + pass + return None + +def clear_session_token(): + """Remove the session token file.""" + try: + if os.path.isfile(SESSION_TOKEN_FILE): + os.unlink(SESSION_TOKEN_FILE) + except OSError: + pass + + +# --- Re-entrancy guard --- +def is_restore_in_progress(): + """Check if a restore operation is currently running (re-entrancy guard).""" + if not os.path.isfile(RESTORE_LOCK_FILE): + return False + fd = None + try: + fd = open(RESTORE_LOCK_FILE, 'r') + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + # We got the lock — nobody holds it, stale file + fcntl.flock(fd, fcntl.LOCK_UN) + try: + os.unlink(RESTORE_LOCK_FILE) + except OSError: + pass + return False + except (BlockingIOError, OSError): + return True # Lock held — restore in progress + except (IOError, OSError): + return False + finally: + if fd is not None: + fd.close() + +def acquire_restore_lock(): + """Acquire the restore lock. Returns file descriptor or None.""" + ensure_volatile_dir() + try: + fd = open(RESTORE_LOCK_FILE, 'w') + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + fd.write(str(os.getpid())) + fd.flush() + return fd + except (BlockingIOError, IOError, OSError): + return None + +def release_restore_lock(fd): + """Release the restore lock.""" + try: + fcntl.flock(fd, fcntl.LOCK_UN) + fd.close() + if os.path.isfile(RESTORE_LOCK_FILE): + os.unlink(RESTORE_LOCK_FILE) + except (IOError, OSError): + pass + + +# --- Timer PID management --- +def read_timer_pid(): + """Read the PID of the running background timer, or None.""" + try: + if os.path.isfile(TIMER_PID_FILE): + with open(TIMER_PID_FILE, 'r') as f: + pid = int(f.read().strip()) + # Check if process is still alive + os.kill(pid, 0) + return pid + except (ValueError, ProcessLookupError, PermissionError, IOError, OSError): + clean_timer_pid() + return None + +def write_timer_pid(pid): + """Store the timer process PID.""" + ensure_volatile_dir() + with open(TIMER_PID_FILE, 'w') as f: + f.write(str(pid)) + +def clean_timer_pid(): + """Remove the timer PID file.""" + try: + if os.path.isfile(TIMER_PID_FILE): + os.unlink(TIMER_PID_FILE) + except OSError: + pass + + +# --- Kill running timer --- +def kill_timer(): + """Kill the background timer process if running.""" + pid = read_timer_pid() + if pid is not None: + try: + os.kill(pid, signal.SIGTERM) + for _ in range(10): + time.sleep(0.1) + try: + os.kill(pid, 0) + except ProcessLookupError: + break + else: + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + pass + except (ProcessLookupError, PermissionError): + pass + clean_timer_pid() + + +# --- Safe mode state queries --- +def is_safe_mode_active(): + """Check if safe mode is currently active.""" + state = read_persistent_state() + if state is None: + return False + if state.get('mode') != 'safemode': + return False + if read_timer_pid() is not None: + return True + expiry = state.get('expiry_time', 0) + if time.time() < expiry: + return True + return False + +def get_safe_mode_info(): + """Get full safe mode status information. Always returns all keys.""" + state = read_persistent_state() + default = { + 'active': False, + 'mode': 'idle', + 'backup_file': '', + 'backup_revision': '', + 'start_time': 0, + 'expiry_time': 0, + 'remaining_seconds': 0, + 'timeout': 0, + 'rollback_method': 'reboot', + 'timer_pid': None, + 'token': None, + } + if state is None: + return default + + now = time.time() + expiry = state.get('expiry_time', 0) + remaining = max(0, expiry - now) + + return { + 'active': state.get('mode') == 'safemode' and ( + remaining > 0 or read_timer_pid() is not None), + 'mode': state.get('mode', 'idle'), + 'backup_file': state.get('backup_file', ''), + 'backup_revision': state.get('backup_revision', ''), + 'start_time': state.get('start_time', 0), + 'expiry_time': expiry, + 'remaining_seconds': int(remaining), + 'timeout': state.get('timeout', 0), + 'rollback_method': state.get('rollback_method', 'reboot'), + 'timer_pid': read_timer_pid(), + 'token': read_session_token(), + } + + +# --- Firmware update detection --- +def is_firmware_update_running(): + """Check if a firmware update is in progress.""" + if os.path.isfile(FIRMWARE_LOCK): + return True + try: + for proc_name in FIRMWARE_PROCS: + result = subprocess.run( + ['pgrep', '-x', proc_name], # -x = exact match on process name + capture_output=True, timeout=5 + ) + if result.returncode == 0: + return True + except (subprocess.TimeoutExpired, OSError): + pass + return False + + +# --- Config backup helpers --- +def get_latest_backup(): + """Get the path of the most recent timestamped config backup.""" + backups = glob.glob(os.path.join(CONFIG_BACKUP_DIR, 'config-*.xml')) + # Only consider timestamped backups, not safety backups like config-pre-rollback.xml + backups = [b for b in backups if BACKUP_TIMESTAMP_RE.match(os.path.basename(b))] + backups.sort() + if backups: + return backups[-1] + return None + +def get_previous_backup(): + """Get the second-most-recent timestamped backup (the one BEFORE the latest).""" + backups = glob.glob(os.path.join(CONFIG_BACKUP_DIR, 'config-*.xml')) + backups = [b for b in backups if BACKUP_TIMESTAMP_RE.match(os.path.basename(b))] + backups.sort() + if len(backups) >= 2: + return backups[-2] + return None + +def get_backup_revision(backup_path): + """Extract the revision timestamp from a backup filename.""" + basename = os.path.basename(backup_path) + if basename.startswith('config-') and basename.endswith('.xml'): + return basename[7:-4] + return None + + +# --- Gateway detection --- +def get_default_gateway(): + """Get the default gateway IP from the routing table. Returns validated IP string.""" + try: + result = subprocess.run( + ['route', '-n', 'get', 'default'], + capture_output=True, text=True, timeout=5 + ) + for line in result.stdout.splitlines(): + line = line.strip() + if line.startswith('gateway:'): + gw = line.split(':', 1)[1].strip() + # Validate it's a real IP address (prevents injection) + ipaddress.ip_address(gw) + return gw + except (subprocess.TimeoutExpired, OSError, ValueError, IndexError): + pass + return None + + +# --- Configd helper --- +def configctl(cmd, timeout=60): + """Run a configctl command. Uses shlex for safe argument splitting.""" + try: + if os.path.exists('/var/run/configd.socket'): + result = subprocess.run( + ['configctl'] + shlex.split(cmd), + capture_output=True, text=True, timeout=timeout + ) + return result.returncode == 0, result.stdout.strip() + else: + log_warning('configd socket not available, skipping configctl: %s' % cmd) + return False, 'configd unavailable' + except (subprocess.TimeoutExpired, OSError) as e: + log_warning('configctl failed for "%s": %s' % (cmd, str(e))) + return False, str(e) diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py new file mode 100755 index 000000000..126bbc5eb --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/rollback.py @@ -0,0 +1,377 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Rollback Executor + +This script performs the actual configuration rollback. It is called by: + 1. timer_daemon.py (on timer expiry) + 2. safemode.py cancel (manual cancel) + 3. watchdog.py (on connectivity failure) + 4. 10-autorollback-recovery (early boot recovery) + +Safety features: + - Acquires exclusive restore lock (prevents re-entrancy) + - Validates backup file path (must be within /conf/) + - Validates backup file content before restore + - Creates safety backup before overwriting config + - Atomic restore via temp file + rename + - Preserves original config.xml ownership + - Removes config cache to force fresh read + - Supports two rollback methods: full reboot or service reload + - Falls back to direct script execution if configd is unavailable + - Logs everything to syslog + +Usage: rollback.py + rollback_method: "reboot" or "reload" +""" + +import json +import os +import shutil +import subprocess +import sys +import tempfile +import time +import xml.etree.ElementTree as ET + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + log_info, log_warning, log_error, + acquire_restore_lock, release_restore_lock, + is_firmware_update_running, + CONFIG_XML, CONFIG_CACHE, CONFIG_BACKUP_DIR +) + +# Allowed directories for backup files (path traversal defense) +ALLOWED_BACKUP_DIRS = ( + os.path.realpath(CONFIG_BACKUP_DIR), + os.path.realpath('/conf'), +) + + +def validate_backup_path(path): + """ + Validate that a backup file path is within allowed directories. + Prevents path traversal attacks. + """ + real_path = os.path.realpath(path) + for allowed_dir in ALLOWED_BACKUP_DIRS: + if real_path.startswith(allowed_dir + os.sep) or real_path == allowed_dir: + return True + return False + + +def validate_config_xml(path): + """Validate that a file is a parseable OPNsense config.xml.""" + try: + tree = ET.parse(path) + root = tree.getroot() + # Basic sanity: must have or legacy root + if root.tag not in ('opnsense', 'pfsense'): + return False, 'Root element is "%s", expected "opnsense"' % root.tag + # Must have a system section + if root.find('system') is None: + return False, 'Missing section' + # Must have interfaces + if root.find('interfaces') is None: + return False, 'Missing section' + return True, 'Valid' + except ET.ParseError as e: + return False, 'XML parse error: %s' % str(e) + except Exception as e: + return False, 'Validation error: %s' % str(e) + + +def _get_file_ownership(path): + """Get the uid/gid of an existing file. Returns (uid, gid) or None.""" + try: + st = os.stat(path) + return st.st_uid, st.st_gid + except OSError: + return None + + +def restore_config(backup_path): + """ + Restore a config.xml backup file. + + Strategy: + 1. Validate the backup path and content + 2. Create a safety backup of the CURRENT config (in case rollback makes things worse) + 3. Preserve original file ownership + 4. Copy backup to /conf/config.xml atomically via temp file + rename + 5. Remove config cache + """ + # Validate path is within allowed directories + if not validate_backup_path(backup_path): + msg = 'Backup path outside allowed directories: %s' % backup_path + log_error(msg) + return False, msg + + # Validate backup content + valid, msg = validate_config_xml(backup_path) + if not valid: + log_error('Backup validation failed for %s: %s' % (backup_path, msg)) + return False, msg + + # Capture existing ownership before we overwrite + ownership = _get_file_ownership(CONFIG_XML) + + # Safety backup of current config (last resort recovery) + safety_backup = os.path.join(CONFIG_BACKUP_DIR, 'config-pre-rollback.xml') + try: + if os.path.isfile(CONFIG_XML): + shutil.copy2(CONFIG_XML, safety_backup) + log_info('Safety backup created: %s' % safety_backup) + except Exception as e: + log_warning('Could not create safety backup: %s' % str(e)) + # Continue anyway — the rollback is more important + + # Restore the config atomically via temp file + rename + tmp_fd = None + tmp_path = None + try: + conf_dir = os.path.dirname(CONFIG_XML) + tmp_fd, tmp_path = tempfile.mkstemp(dir=conf_dir, prefix='.config_rollback_') + + # Close the fd from mkstemp, copy file content + os.close(tmp_fd) + tmp_fd = None + + shutil.copy2(backup_path, tmp_path) + + # Set permissions — OPNsense expects 0640 + os.chmod(tmp_path, 0o640) + + # Preserve original ownership if we captured it, otherwise use root:wheel + if ownership: + uid, gid = ownership + else: + try: + import pwd + import grp + uid = pwd.getpwnam('root').pw_uid + gid = grp.getgrnam('wheel').gr_gid + except (KeyError, ImportError): + uid, gid = 0, 0 + + try: + os.chown(tmp_path, uid, gid) + except PermissionError: + pass # Best effort + + os.rename(tmp_path, CONFIG_XML) + tmp_path = None # Rename succeeded, don't clean up + log_info('Configuration restored from: %s' % backup_path) + except Exception as e: + log_error('Failed to restore config: %s' % str(e)) + # Clean up failed temp file + if tmp_path and os.path.isfile(tmp_path): + try: + os.unlink(tmp_path) + except OSError: + pass + # Try to restore from safety backup + if os.path.isfile(safety_backup): + try: + shutil.copy2(safety_backup, CONFIG_XML) + log_info('Restored from safety backup after failed rollback') + except Exception: + pass + return False, 'Failed to restore: %s' % str(e) + finally: + if tmp_fd is not None: + os.close(tmp_fd) + + # Remove config cache so PHP reads fresh config + try: + if os.path.isfile(CONFIG_CACHE): + os.unlink(CONFIG_CACHE) + log_info('Config cache removed') + except OSError: + pass + + return True, 'Configuration restored successfully' + + +def apply_reboot(): + """Apply configuration by rebooting the system.""" + log_info('ROLLBACK: Initiating full system reboot') + try: + # Try configd first + if os.path.exists('/var/run/configd.socket'): + subprocess.run( + ['configctl', 'system', 'reboot'], + capture_output=True, timeout=10 + ) + else: + # Direct reboot + subprocess.Popen( + ['/usr/local/etc/rc.reboot'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + return True + except Exception as e: + log_error('Reboot command failed: %s' % str(e)) + # Last resort + try: + subprocess.Popen( + ['shutdown', '-r', 'now'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL + ) + return True + except Exception as e2: + log_error('All reboot methods failed: %s' % str(e2)) + return False + + +def apply_reload(): + """Apply configuration by reloading all services (no reboot).""" + log_info('ROLLBACK: Initiating service reload via rc.reload_all') + try: + # rc.reload_all accepts a delay parameter + proc = subprocess.Popen( + ['/usr/local/etc/rc.reload_all'], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + # Don't wait for it — it can take a while and we don't want to block + log_info('rc.reload_all started (pid=%d)' % proc.pid) + return True + except Exception as e: + log_error('rc.reload_all failed: %s' % str(e)) + # Fallback: try individual service restarts + log_info('Attempting individual service restarts as fallback') + try: + if os.path.exists('/var/run/configd.socket'): + for cmd in ['filter reload', 'interface reconfigure', + 'dns reload', 'dhcpd restart']: + try: + subprocess.run( + ['configctl'] + cmd.split(), + capture_output=True, timeout=30 + ) + except Exception: + pass + return True + except Exception as e2: + log_error('Fallback service restarts also failed: %s' % str(e2)) + return False + + +def main(): + if len(sys.argv) < 3: + print(json.dumps({ + 'status': 'error', + 'message': 'Usage: rollback.py ' + })) + sys.exit(1) + + backup_file = sys.argv[1] + rollback_method = sys.argv[2] + + # Validate inputs + if not os.path.isfile(backup_file): + msg = 'Backup file does not exist: %s' % backup_file + log_error(msg) + print(json.dumps({'status': 'error', 'message': msg})) + sys.exit(1) + + if not validate_backup_path(backup_file): + msg = 'Backup file outside allowed directories: %s' % backup_file + log_error(msg) + print(json.dumps({'status': 'error', 'message': msg})) + sys.exit(1) + + if rollback_method not in ('reboot', 'reload'): + rollback_method = 'reboot' # Default to safest option + log_warning('Unknown rollback method, defaulting to reboot') + + # Prevent rollback during firmware updates + if is_firmware_update_running(): + msg = 'Rollback blocked: firmware update in progress' + log_warning(msg) + print(json.dumps({'status': 'blocked', 'message': msg})) + sys.exit(1) + + # Acquire exclusive lock + lock_fd = acquire_restore_lock() + if lock_fd is None: + msg = 'Another rollback is already in progress' + log_warning(msg) + print(json.dumps({'status': 'locked', 'message': msg})) + sys.exit(1) + + try: + # Step 1: Restore config.xml + log_info('=== ROLLBACK STARTING === backup=%s method=%s' % ( + backup_file, rollback_method)) + + success, msg = restore_config(backup_file) + if not success: + print(json.dumps({'status': 'error', 'message': msg})) + sys.exit(1) + + # Step 2: Apply the restored config + if rollback_method == 'reboot': + apply_success = apply_reboot() + else: + apply_success = apply_reload() + + if apply_success: + log_info('=== ROLLBACK COMPLETE === method=%s' % rollback_method) + print(json.dumps({ + 'status': 'ok', + 'message': 'Rollback completed (method: %s)' % rollback_method, + 'backup_restored': backup_file, + 'method': rollback_method, + })) + else: + log_error('=== ROLLBACK APPLY FAILED === method=%s' % rollback_method) + # If reload failed, try reboot as last resort + if rollback_method == 'reload': + log_info('Reload failed, falling back to reboot') + apply_reboot() + print(json.dumps({ + 'status': 'partial', + 'message': 'Config restored but service apply failed. Rebooting.', + 'backup_restored': backup_file, + })) + + finally: + release_restore_lock(lock_fd) + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py new file mode 100755 index 000000000..6feccd1e8 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/safemode.py @@ -0,0 +1,368 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Safe Mode Controller + +Usage: + safemode.py start [timeout_seconds] + safemode.py confirm + safemode.py cancel + safemode.py extend [additional_seconds] + +Start: Snapshots current config, launches background timer. +Confirm: Accepts changes, kills timer, clears state. +Cancel: Manually triggers rollback immediately. +Extend: Adds time to the countdown. +""" + +import json +import os +import sys +import subprocess +import time + +# Add parent directory to path for lib imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from lib.common import ( + ensure_volatile_dir, log_info, log_warning, log_error, + read_model_settings, + read_persistent_state, write_persistent_state, clear_persistent_state, + generate_session_token, clear_session_token, + is_safe_mode_active, get_safe_mode_info, + is_firmware_update_running, is_restore_in_progress, + get_latest_backup, get_backup_revision, + write_timer_pid, kill_timer, read_timer_pid, + VOLATILE_DIR, CONFIG_XML, CONFIG_BACKUP_DIR +) + + +def force_config_save(): + """ + Force OPNsense to save the current config, creating a backup. + We do this to ensure we have a backup of the exact running state. + Returns the backup path or None. + """ + try: + # Use configctl to trigger a config save + result = subprocess.run( + ['configctl', 'firmware', 'configure'], + capture_output=True, text=True, timeout=30 + ) + + # Now find the most recent backup + backup = get_latest_backup() + if backup: + log_info('Config backup created: %s' % backup) + return backup + else: + log_error('No backup found after config save') + return None + except Exception as e: + log_error('Failed to force config save: %s' % str(e)) + return None + + +def _launch_timer_daemon(timeout, rollback_method): + """Launch the background timer daemon process. Returns (pid, error_msg).""" + timer_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'timer_daemon.py') + try: + proc = subprocess.Popen( + [sys.executable, timer_script, str(int(timeout)), rollback_method], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True # Detach from parent + ) + write_timer_pid(proc.pid) + return proc.pid, None + except Exception as e: + return None, str(e) + + +def start_safe_mode(timeout_override=None): + """Enter safe mode. Snapshot config and start countdown timer.""" + result = {'status': 'error', 'message': ''} + + # Pre-flight checks + settings = read_model_settings() + if not settings['enabled']: + result['message'] = 'Auto-rollback plugin is disabled. Enable it in System > Auto Rollback.' + print(json.dumps(result)) + return + + if is_firmware_update_running(): + result['message'] = 'Cannot enter safe mode during a firmware update.' + print(json.dumps(result)) + return + + if is_restore_in_progress(): + result['message'] = 'A restore operation is already in progress.' + print(json.dumps(result)) + return + + if is_safe_mode_active(): + info = get_safe_mode_info() + result['message'] = 'Safe mode is already active (%d seconds remaining).' % info['remaining_seconds'] + result['status'] = 'already_active' + result.update(info) + print(json.dumps(result)) + return + + # Determine timeout — use is not None to allow timeout_override=0 edge case + if timeout_override is not None: + timeout = timeout_override + else: + timeout = settings['timeout'] + timeout = max(30, min(3600, int(timeout))) + + # Step 1: Get the current config as our "known good" backup + # The most recent backup IS the current running config (saved moments ago) + backup = get_latest_backup() + if not backup: + # Force a save to create one + backup = force_config_save() + if not backup: + result['message'] = 'Failed to create configuration backup.' + print(json.dumps(result)) + return + + backup_revision = get_backup_revision(backup) + now = time.time() + expiry = now + timeout + + # Step 2: Generate session token for the confirmation UI + token = generate_session_token() + + # Step 3: Write persistent state (survives reboot for early-boot recovery) + state = { + 'mode': 'safemode', + 'backup_file': backup, + 'backup_revision': backup_revision, + 'start_time': now, + 'expiry_time': expiry, + 'timeout': timeout, + 'rollback_method': settings['rollback_method'], + } + write_persistent_state(state) + + # Step 4: Launch background timer process + pid, err = _launch_timer_daemon(timeout, settings['rollback_method']) + if pid is None: + log_error('Failed to start timer daemon: %s' % err) + clear_persistent_state() + clear_session_token() + result['message'] = 'Failed to start countdown timer: %s' % err + print(json.dumps(result)) + return + + log_info('Safe mode started: timeout=%ds, backup=%s, timer_pid=%d' % ( + timeout, backup, pid)) + + # Step 5: Trigger git backup if available + try: + subprocess.run( + ['configctl', 'firmware', 'configure'], + capture_output=True, timeout=10 + ) + except Exception: + pass # Non-critical + + result = { + 'status': 'ok', + 'message': 'Safe mode activated. You have %d seconds to confirm changes.' % timeout, + 'timeout': timeout, + 'remaining_seconds': timeout, + 'expiry_time': expiry, + 'backup_file': backup, + 'backup_revision': backup_revision, + 'token': token, + 'rollback_method': settings['rollback_method'], + } + print(json.dumps(result)) + + +def confirm_safe_mode(): + """Confirm changes and exit safe mode gracefully.""" + result = {'status': 'error', 'message': ''} + + if not is_safe_mode_active(): + result['message'] = 'Safe mode is not active.' + result['status'] = 'not_active' + print(json.dumps(result)) + return + + # Kill the background timer + kill_timer() + + # Clear all state + state = read_persistent_state() + clear_persistent_state() + clear_session_token() + + log_info('Safe mode confirmed. Changes accepted. Previous backup: %s' % ( + state.get('backup_file', 'unknown') if state else 'unknown')) + + result = { + 'status': 'ok', + 'message': 'Changes confirmed. Safe mode deactivated.', + } + print(json.dumps(result)) + + +def cancel_safe_mode(): + """Cancel changes and rollback immediately.""" + result = {'status': 'error', 'message': ''} + + state = read_persistent_state() + if state is None or state.get('mode') != 'safemode': + result['message'] = 'Safe mode is not active.' + result['status'] = 'not_active' + print(json.dumps(result)) + return + + # Kill the background timer first + kill_timer() + + backup_file = state.get('backup_file', '') + rollback_method = state.get('rollback_method', 'reboot') + + if not backup_file or not os.path.isfile(backup_file): + clear_persistent_state() + clear_session_token() + result['message'] = 'Backup file not found: %s' % backup_file + print(json.dumps(result)) + return + + log_info('Safe mode cancelled. Rolling back to: %s (method: %s)' % ( + backup_file, rollback_method)) + + # Clear state before rollback (important: prevents re-entrancy) + clear_persistent_state() + clear_session_token() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + proc_result = subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + capture_output=True, text=True, timeout=300 + ) + if proc_result.returncode == 0: + result = { + 'status': 'ok', + 'message': 'Rollback initiated (method: %s). System is reverting.' % rollback_method, + 'rollback_method': rollback_method, + } + else: + result['message'] = 'Rollback script failed: %s' % proc_result.stderr + except Exception as e: + result['message'] = 'Rollback execution failed: %s' % str(e) + + print(json.dumps(result)) + + +def extend_safe_mode(additional_seconds=None): + """Extend the safe mode countdown timer.""" + result = {'status': 'error', 'message': ''} + + state = read_persistent_state() + if state is None or state.get('mode') != 'safemode': + result['message'] = 'Safe mode is not active.' + result['status'] = 'not_active' + print(json.dumps(result)) + return + + if additional_seconds is None: + additional_seconds = 60 # Default extension + + additional_seconds = max(10, min(3600, int(additional_seconds))) + + # Update expiry in persistent state + new_expiry = state.get('expiry_time', time.time()) + additional_seconds + state['expiry_time'] = new_expiry + write_persistent_state(state) + + # Kill old timer and start a new one with remaining time + kill_timer() + remaining = int(new_expiry - time.time()) + if remaining > 0: + rollback_method = state.get('rollback_method', 'reboot') + pid, err = _launch_timer_daemon(remaining, rollback_method) + if pid is None: + log_error('Failed to restart timer: %s' % err) + else: + remaining = 0 + + log_info('Safe mode extended by %d seconds. New remaining: %d seconds.' % ( + additional_seconds, remaining)) + + result = { + 'status': 'ok', + 'message': 'Timer extended by %d seconds. %d seconds remaining.' % ( + additional_seconds, remaining), + 'remaining_seconds': remaining, + 'expiry_time': new_expiry, + } + print(json.dumps(result)) + + +if __name__ == '__main__': + ensure_volatile_dir() + + if len(sys.argv) < 2: + print(json.dumps({'status': 'error', 'message': 'Usage: safemode.py start|confirm|cancel|extend [args]'})) + sys.exit(1) + + action = sys.argv[1].lower() + + if action == 'start': + timeout = None + if len(sys.argv) > 2: + try: + timeout = int(sys.argv[2]) + except ValueError: + print(json.dumps({'status': 'error', 'message': 'Invalid timeout value: %s' % sys.argv[2]})) + sys.exit(1) + start_safe_mode(timeout) + elif action == 'confirm': + confirm_safe_mode() + elif action == 'cancel': + cancel_safe_mode() + elif action == 'extend': + extra = None + if len(sys.argv) > 2: + try: + extra = int(sys.argv[2]) + except ValueError: + print(json.dumps({'status': 'error', 'message': 'Invalid seconds value: %s' % sys.argv[2]})) + sys.exit(1) + extend_safe_mode(extra) + else: + print(json.dumps({'status': 'error', 'message': 'Unknown action: %s' % action})) + sys.exit(1) diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/status.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/status.py new file mode 100755 index 000000000..017201806 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/status.py @@ -0,0 +1,145 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Status Reporter + +Returns the current state of the auto-rollback system as JSON. +Used by the dashboard widget, API, and CLI. + +Usage: status.py (no arguments) +""" + +import json +import os +import sys +import time + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + ensure_volatile_dir, + read_model_settings, + read_persistent_state, read_session_token, + read_timer_pid, is_restore_in_progress, + WATCHDOG_FAIL_COUNT_FILE, WATCHDOG_LAST_CONFIG_FILE, +) + + +def get_watchdog_status(): + """Get the watchdog subsystem status.""" + fail_count = 0 + last_config_time = 0 + last_config_backup = '' + + try: + if os.path.isfile(WATCHDOG_FAIL_COUNT_FILE): + with open(WATCHDOG_FAIL_COUNT_FILE, 'r') as f: + fail_count = int(f.read().strip()) + except (ValueError, IOError): + pass + + try: + if os.path.isfile(WATCHDOG_LAST_CONFIG_FILE): + with open(WATCHDOG_LAST_CONFIG_FILE, 'r') as f: + data = json.load(f) + last_config_time = data.get('time', 0) + last_config_backup = data.get('backup', '') + except (json.JSONDecodeError, IOError): + pass + + return { + 'fail_count': fail_count, + 'last_config_change': last_config_time, + 'last_config_backup': last_config_backup, + } + + +def main(): + ensure_volatile_dir() + + now = time.time() + settings = read_model_settings() + state = read_persistent_state() + watchdog = get_watchdog_status() + + # Determine safe mode status + safe_mode_active = False + safe_mode_remaining = 0 + safe_mode_info = {} + + if state and state.get('mode') == 'safemode': + expiry = state.get('expiry_time', 0) + remaining = max(0, expiry - now) + timer_pid = read_timer_pid() + safe_mode_active = remaining > 0 or timer_pid is not None + + safe_mode_info = { + 'backup_file': state.get('backup_file', ''), + 'backup_revision': state.get('backup_revision', ''), + 'start_time': state.get('start_time', 0), + 'expiry_time': expiry, + 'remaining_seconds': int(remaining), + 'timeout': state.get('timeout', 0), + 'rollback_method': state.get('rollback_method', 'reboot'), + 'timer_pid': timer_pid, + } + safe_mode_remaining = int(remaining) + + # Determine overall system state + if is_restore_in_progress(): + system_state = 'restoring' + elif safe_mode_active: + system_state = 'safe_mode' + elif settings['enabled']: + system_state = 'armed' + else: + system_state = 'disabled' + + result = { + 'status': 'ok', + 'timestamp': now, + 'system_state': system_state, + 'settings': settings, + 'safe_mode': { + 'active': safe_mode_active, + 'remaining_seconds': safe_mode_remaining, + **safe_mode_info, + }, + 'watchdog': { + 'enabled': settings['watchdog_enabled'], + **watchdog, + }, + 'token': read_session_token(), + } + + print(json.dumps(result, indent=2)) + + +if __name__ == '__main__': + try: + main() + except Exception as e: + print(json.dumps({'status': 'error', 'message': str(e)})) diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py new file mode 100755 index 000000000..15ddc9bbe --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/timer_daemon.py @@ -0,0 +1,198 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Timer Daemon + +This is a background process that counts down and triggers rollback +if not killed before expiry. It is the PRIMARY rollback trigger. + +Design: + - Launched by safemode.py start + - Double-forks to fully detach from configd parent process + - Sleeps in 1-second intervals (allows responsive cancellation via SIGTERM) + - On expiry: reads the backup path from persistent state and executes rollback + - On SIGTERM: exits cleanly (safe mode was confirmed or cancelled) + - PID is stored in /var/run/autorollback/timer.pid + +Usage: timer_daemon.py +""" + +import os +import sys +import signal +import time +import subprocess + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + log_info, log_warning, log_error, + read_persistent_state, clear_persistent_state, clear_session_token, + clean_timer_pid, write_timer_pid, VOLATILE_DIR +) + +# Global flag for clean shutdown +_shutdown = False + + +def handle_sigterm(signum, frame): + """Handle SIGTERM for clean shutdown (safe mode confirmed/cancelled).""" + global _shutdown + _shutdown = True + + +def daemonize(): + """ + Double-fork to fully detach from the parent process (configd). + + This ensures the timer daemon survives even if configd restarts, + and that configd doesn't block waiting for our exit. + """ + # First fork — exit parent (returns control to configd) + pid = os.fork() + if pid > 0: + # Parent: exit immediately so configd doesn't block + os._exit(0) + + # First child: create new session + os.setsid() + + # Second fork — prevent reacquiring a controlling terminal + pid = os.fork() + if pid > 0: + # First child exits + os._exit(0) + + # Second child: the actual daemon process + # Redirect standard file descriptors to /dev/null + devnull = os.open(os.devnull, os.O_RDWR) + try: + os.dup2(devnull, 0) # stdin + os.dup2(devnull, 1) # stdout + os.dup2(devnull, 2) # stderr + finally: + if devnull > 2: + os.close(devnull) + + # Update PID file with our actual daemon PID + write_timer_pid(os.getpid()) + + +def run_timer(timeout, rollback_method): + """Main timer loop. Counts down and triggers rollback on expiry.""" + global _shutdown + + # Register signal handlers + signal.signal(signal.SIGTERM, handle_sigterm) + signal.signal(signal.SIGINT, handle_sigterm) + + log_info('Timer daemon started: timeout=%ds, method=%s, pid=%d' % ( + timeout, rollback_method, os.getpid())) + + # Count down in 1-second intervals + elapsed = 0 + while elapsed < timeout: + if _shutdown: + log_info('Timer daemon received shutdown signal. Exiting cleanly.') + clean_timer_pid() + sys.exit(0) + + time.sleep(1) + elapsed += 1 + + # Timer expired! Time to rollback. + log_warning('SAFE MODE TIMER EXPIRED after %d seconds. Initiating rollback.' % timeout) + + # Read the backup file from persistent state + state = read_persistent_state() + if state is None: + log_error('Timer expired but no persistent state found. Someone else handled it.') + clean_timer_pid() + sys.exit(0) + + backup_file = state.get('backup_file', '') + if not backup_file or not os.path.isfile(backup_file): + log_error('Timer expired but backup file missing: %s' % backup_file) + clear_persistent_state() + clear_session_token() + clean_timer_pid() + sys.exit(1) + + # Clear state BEFORE rollback to prevent re-entrancy + clear_persistent_state() + clear_session_token() + clean_timer_pid() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + log_info('Executing rollback: backup=%s, method=%s' % (backup_file, rollback_method)) + result = subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + stdin=subprocess.DEVNULL, + capture_output=True, text=True, timeout=300 + ) + if result.returncode != 0: + log_error('Rollback script failed: %s' % result.stderr) + sys.exit(1) + else: + log_info('Rollback script completed successfully.') + except subprocess.TimeoutExpired: + log_error('Rollback script timed out after 300 seconds.') + sys.exit(1) + except Exception as e: + log_error('Rollback execution failed: %s' % str(e)) + sys.exit(1) + + +def main(): + if len(sys.argv) < 3: + print('Usage: timer_daemon.py ', file=sys.stderr) + sys.exit(1) + + try: + timeout = int(sys.argv[1]) + except ValueError: + print('Invalid timeout value: %s' % sys.argv[1], file=sys.stderr) + sys.exit(1) + + if timeout <= 0: + print('Timeout must be positive, got: %d' % timeout, file=sys.stderr) + sys.exit(1) + + rollback_method = sys.argv[2] + if rollback_method not in ('reboot', 'reload'): + rollback_method = 'reboot' + + # Double-fork to fully detach from configd + daemonize() + + # Now running as a proper daemon + run_timer(timeout, rollback_method) + + +if __name__ == '__main__': + main() diff --git a/sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py b/sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py new file mode 100755 index 000000000..2ec888404 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/scripts/autorollback/watchdog.py @@ -0,0 +1,354 @@ +#!/usr/local/bin/python3 +""" + Copyright (c) 2026 MP Lindsey + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +""" +""" +OPNsense Auto Rollback - Connectivity Watchdog + +Called by cron every minute. This is Layer 2 of the safety system: + Layer 1: Timer daemon (primary, second-precise) + Layer 2: This watchdog (secondary, minute-precise) + Layer 3: Early boot recovery (tertiary, crash recovery) + +This script has TWO functions: + +1. CRON SAFETY NET for Safe Mode: + If the timer daemon died but safe mode state is still pending and expired, + trigger rollback. This catches the case where the timer process crashed. + +2. CONNECTIVITY WATCHDOG (always-on): + After any config change, run health checks. If checks fail N consecutive + times within the grace period after a config change, rollback to the + last known-good config. + +Usage: watchdog.py (no arguments, called by cron) +""" + +import json +import os +import re +import shlex +import subprocess +import sys +import time + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from lib.common import ( + log_info, log_warning, log_error, + read_model_settings, ensure_volatile_dir, + read_persistent_state, clear_persistent_state, clear_session_token, + is_restore_in_progress, is_firmware_update_running, + get_default_gateway, get_previous_backup, + read_timer_pid, kill_timer, clean_timer_pid, + write_timer_pid, + VOLATILE_DIR, WATCHDOG_FAIL_COUNT_FILE, WATCHDOG_LAST_CONFIG_FILE, + CONFIG_XML +) + + +def get_fail_count(): + """Read the consecutive failure count.""" + try: + if os.path.isfile(WATCHDOG_FAIL_COUNT_FILE): + with open(WATCHDOG_FAIL_COUNT_FILE, 'r') as f: + return int(f.read().strip()) + except (ValueError, IOError): + pass + return 0 + + +def set_fail_count(count): + """Write the consecutive failure count.""" + try: + with open(WATCHDOG_FAIL_COUNT_FILE, 'w') as f: + f.write(str(count)) + except IOError: + pass + + +def clear_fail_count(): + """Reset the failure counter.""" + try: + if os.path.isfile(WATCHDOG_FAIL_COUNT_FILE): + os.unlink(WATCHDOG_FAIL_COUNT_FILE) + except OSError: + pass + + +def get_last_config_change(): + """Read the last config change record (time, new backup, previous backup).""" + try: + if os.path.isfile(WATCHDOG_LAST_CONFIG_FILE): + with open(WATCHDOG_LAST_CONFIG_FILE, 'r') as f: + data = json.load(f) + return ( + data.get('time', 0), + data.get('backup', ''), + data.get('previous_backup', ''), + ) + except (json.JSONDecodeError, IOError): + pass + return 0, '', '' + + +def run_health_check(command, pattern, gateway=None): + """ + Run a health check command and match its output against a pattern. + Returns (passed, output). + + Security: gateway is already validated by get_default_gateway() via + ipaddress.ip_address(). We still use shlex.quote() for defense-in-depth + since the command runs with shell=True. + """ + if not command: + return True, 'No command configured' + + # Substitute %gateway% placeholder with safely quoted value + if '%gateway%' in command: + if gateway: + command = command.replace('%gateway%', shlex.quote(gateway)) + else: + # No gateway available, skip this check + return True, 'No gateway available, skipping check' + + try: + result = subprocess.run( + command, shell=True, + capture_output=True, text=True, timeout=15 + ) + output = result.stdout + result.stderr + + if pattern: + try: + if re.search(pattern, output): + return True, output.strip()[:200] + else: + return False, 'Pattern "%s" not found in output' % pattern + except re.error as e: + log_warning('Watchdog: invalid regex pattern "%s": %s — treating as pass' % (pattern, e)) + return True, 'Invalid pattern (skipped)' + else: + # No pattern — just check exit code + return result.returncode == 0, output.strip()[:200] + + except subprocess.TimeoutExpired: + return False, 'Command timed out after 15 seconds' + except Exception as e: + return False, 'Command error: %s' % str(e) + + +def check_safe_mode_expired(): + """ + CRON SAFETY NET: Check if safe mode timer expired but daemon died. + This is the secondary trigger — catches crashed timer daemons. + """ + state = read_persistent_state() + if state is None or state.get('mode') != 'safemode': + return False + + expiry = state.get('expiry_time', 0) + now = time.time() + + if now < expiry: + # Not expired yet — check if timer daemon is still alive + if read_timer_pid() is None: + remaining = int(expiry - now) + log_warning('Safe mode timer daemon died! %d seconds remaining. Restarting timer.' % remaining) + # Restart the timer daemon + rollback_method = state.get('rollback_method', 'reboot') + timer_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'timer_daemon.py') + try: + proc = subprocess.Popen( + [sys.executable, timer_script, str(remaining), rollback_method], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + # Don't write PID here — the daemon writes its own after double-fork. + # The Popen PID is the pre-fork process which exits immediately. + log_info('Timer daemon restarted with %d seconds remaining' % remaining) + except Exception as e: + log_error('Failed to restart timer daemon: %s' % str(e)) + return False + + # Timer expired and daemon is not running — we need to rollback! + log_warning('CRON SAFETY NET: Safe mode expired %d seconds ago. Timer daemon missing. Triggering rollback.' % ( + int(now - expiry))) + + backup_file = state.get('backup_file', '') + rollback_method = state.get('rollback_method', 'reboot') + + if not backup_file or not os.path.isfile(backup_file): + log_error('Cannot rollback: backup file missing: %s' % backup_file) + clear_persistent_state() + clear_session_token() + return True + + # Clear state before rollback + clear_persistent_state() + clear_session_token() + clean_timer_pid() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + stdin=subprocess.DEVNULL, + capture_output=True, timeout=300 + ) + except Exception as e: + log_error('Cron safety net rollback failed: %s' % str(e)) + + return True + + +def run_watchdog(settings): + """ + CONNECTIVITY WATCHDOG: Run health checks after config changes. + """ + last_change_time, last_backup, previous_backup = get_last_config_change() + + if last_change_time == 0: + # No recent config change recorded — nothing to watch + clear_fail_count() + return + + now = time.time() + age = now - last_change_time + grace = settings['grace_period'] + + # Only run checks within the grace period after a config change + if age > grace + 300: + # More than grace+5min since last change — stop watching + clear_fail_count() + return + + # Still within grace period — skip checks until grace period elapses + if age < grace: + clear_fail_count() # Reset stale count from previous config change + return + + # Run health checks + gateway = get_default_gateway() + + check1_ok, check1_msg = run_health_check( + settings['check_command'], settings['check_pattern'], gateway) + + check2_ok = True + check2_msg = '' + if settings.get('check_command_2'): + check2_ok, check2_msg = run_health_check( + settings['check_command_2'], settings['check_pattern_2'], gateway) + + all_ok = check1_ok and check2_ok + + if all_ok: + fails = get_fail_count() + if fails > 0: + log_info('Watchdog: health check recovered after %d failures' % fails) + clear_fail_count() + return + + # Check failed + fails = get_fail_count() + 1 + set_fail_count(fails) + + log_warning('Watchdog: health check failed (%d/%d). Check1: %s. Check2: %s' % ( + fails, settings['fail_threshold'], + check1_msg if not check1_ok else 'OK', + check2_msg if not check2_ok else 'OK')) + + if fails >= settings['fail_threshold']: + log_warning('WATCHDOG: Failure threshold reached (%d/%d). Triggering rollback!' % ( + fails, settings['fail_threshold'])) + + # Find the correct backup to restore — the one BEFORE the config change + # that broke connectivity (previous_backup), NOT the new one. + backup_file = None + if previous_backup and os.path.isfile(previous_backup): + backup_file = previous_backup + log_info('Watchdog: rolling back to pre-change backup: %s' % backup_file) + else: + # Fallback: try to find the second-most-recent backup + backup_file = get_previous_backup() + if backup_file: + log_info('Watchdog: rolling back to previous backup: %s' % backup_file) + else: + log_error('Watchdog: No suitable backup file available for rollback') + clear_fail_count() + return + + rollback_method = settings['rollback_method'] + clear_fail_count() + + # Execute rollback + rollback_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'rollback.py') + try: + subprocess.run( + [sys.executable, rollback_script, backup_file, rollback_method], + stdin=subprocess.DEVNULL, + capture_output=True, timeout=300 + ) + except Exception as e: + log_error('Watchdog rollback failed: %s' % str(e)) + + +def main(): + result = {'status': 'ok', 'checks': []} + + # Skip if restore is in progress (re-entrancy guard) + if is_restore_in_progress(): + result['message'] = 'Restore in progress, skipping watchdog' + print(json.dumps(result)) + return + + # Skip during firmware updates + if is_firmware_update_running(): + result['message'] = 'Firmware update in progress, skipping watchdog' + print(json.dumps(result)) + return + + # Check 1: Safe mode cron safety net + if check_safe_mode_expired(): + result['message'] = 'Safe mode expired — rollback triggered by cron safety net' + print(json.dumps(result)) + return + + # Check 2: Connectivity watchdog + settings = read_model_settings() + if settings['enabled'] and settings['watchdog_enabled']: + run_watchdog(settings) + result['message'] = 'Watchdog check completed' + else: + result['message'] = 'Watchdog disabled' + + print(json.dumps(result)) + + +if __name__ == '__main__': + ensure_volatile_dir() + main() diff --git a/sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf b/sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf new file mode 100644 index 000000000..8550cad96 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/service/conf/actions.d/actions_autorollback.conf @@ -0,0 +1,48 @@ +[safemode.start] +command:/usr/local/opnsense/scripts/autorollback/safemode.py start +parameters:%s +type:script_output +message:Starting auto-rollback safe mode +description:Start safe mode with configuration snapshot + +[safemode.confirm] +command:/usr/local/opnsense/scripts/autorollback/safemode.py confirm +parameters: +type:script_output +message:Confirming safe mode changes +description:Confirm configuration changes and exit safe mode + +[safemode.cancel] +command:/usr/local/opnsense/scripts/autorollback/safemode.py cancel +parameters: +type:script_output +message:Cancelling safe mode - reverting changes +description:Cancel safe mode and revert to previous configuration + +[safemode.extend] +command:/usr/local/opnsense/scripts/autorollback/safemode.py extend +parameters:%s +type:script_output +message:Extending safe mode timer +description:Extend the safe mode countdown timer + +[rollback.execute] +command:/usr/local/opnsense/scripts/autorollback/rollback.py +parameters:%s +type:script_output +message:Executing configuration rollback +description:Roll back to a previous configuration + +[watchdog.check] +command:/usr/local/opnsense/scripts/autorollback/watchdog.py +parameters: +type:script_output +message:Running watchdog health check +description:Connectivity watchdog health check + +[status] +command:/usr/local/opnsense/scripts/autorollback/status.py +parameters: +type:script_output +message:Getting auto-rollback status +description:Report current auto-rollback state diff --git a/sysutils/autorollback/src/opnsense/service/templates/OPNsense/AutoRollback/+TARGETS b/sysutils/autorollback/src/opnsense/service/templates/OPNsense/AutoRollback/+TARGETS new file mode 100644 index 000000000..e69de29bb diff --git a/sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js b/sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js new file mode 100644 index 000000000..14d32c933 --- /dev/null +++ b/sysutils/autorollback/src/opnsense/www/js/autorollback_banner.js @@ -0,0 +1,305 @@ +/* + * Copyright (C) 2026 MP Lindsey + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * OPNsense Auto Rollback - Persistent Global Banner + * + * This script injects a countdown banner at the top of EVERY page when + * safe mode is active. It polls the status API and shows/hides the banner + * dynamically. Includes confirm/revert buttons for immediate action + * without navigating to the plugin settings page. + * + * This file should be included in the base layout template or via a + * system hook that adds JavaScript to every page. + * + * Design: Non-intrusive but unmissable. Fixed position below the navbar, + * full-width, with a pulsing amber background during safe mode. + */ +(function() { + 'use strict'; + + // Don't double-initialize + if (window._autorollbackBannerInit) return; + window._autorollbackBannerInit = true; + + const POLL_INTERVAL_IDLE = 10000; // 10s when not in safe mode + const POLL_INTERVAL_ACTIVE = 1000; // 1s during safe mode + const BANNER_ID = 'autorollback-global-banner'; + + let pollTimer = null; + let currentPollInterval = POLL_INTERVAL_IDLE; + let bannerElement = null; + + function createBanner() { + if (document.getElementById(BANNER_ID)) return; + + const banner = document.createElement('div'); + banner.id = BANNER_ID; + banner.innerHTML = ` + +
+ + Safe Mode Active + -- + + + +
+
+ `; + + document.body.appendChild(banner); + bannerElement = banner; + + // Button event listeners + document.getElementById('arb-confirm').addEventListener('click', function() { + this.disabled = true; + this.textContent = 'Confirming...'; + apiPost('confirm', function() { + pollStatus(); + }); + }); + + document.getElementById('arb-revert').addEventListener('click', function() { + if (confirm('Revert to the previous configuration?\nThe system may reboot.')) { + this.disabled = true; + this.textContent = 'Reverting...'; + apiPost('cancel', function() { + pollStatus(); + }); + } + }); + + document.getElementById('arb-extend').addEventListener('click', function() { + apiPost('extend', function() { + pollStatus(); + }, {seconds: 60}); + }); + } + + function showBanner(remaining, total) { + if (!bannerElement) createBanner(); + + bannerElement.classList.add('visible'); + + // Danger mode when under 20% time remaining + let pct = total > 0 ? remaining / total : 0; + if (pct <= 0.2) { + bannerElement.classList.add('danger'); + } else { + bannerElement.classList.remove('danger'); + } + + // Update countdown + let mins = Math.floor(remaining / 60); + let secs = remaining % 60; + let display = mins > 0 + ? mins + 'm ' + String(secs).padStart(2, '0') + 's' + : secs + 's'; + document.getElementById('arb-countdown').textContent = display; + + // Progress bar + document.getElementById('arb-progress').style.width = (pct * 100) + '%'; + + // Re-enable buttons + let confirmBtn = document.getElementById('arb-confirm'); + let revertBtn = document.getElementById('arb-revert'); + confirmBtn.disabled = false; + confirmBtn.innerHTML = '✓ CONFIRM'; + revertBtn.disabled = false; + revertBtn.innerHTML = '↺ REVERT'; + + // Push body content down to avoid overlap + document.body.style.paddingTop = bannerElement.offsetHeight + 'px'; + } + + function hideBanner() { + if (bannerElement) { + bannerElement.classList.remove('visible'); + document.body.style.paddingTop = ''; + } + } + + function apiPost(action, callback, data) { + let xhr = new XMLHttpRequest(); + xhr.open('POST', '/api/autorollback/service/' + action, true); + xhr.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded'); + + // Include CSRF token if available (OPNsense uses jQuery for this) + let csrfToken = ''; + if (typeof $ !== 'undefined' && $.ajaxSettings && $.ajaxSettings.headers) { + csrfToken = $.ajaxSettings.headers['X-CSRFToken'] || ''; + } + if (csrfToken) { + xhr.setRequestHeader('X-CSRFToken', csrfToken); + } + + xhr.onload = function() { + if (callback) callback(); + }; + + let body = ''; + if (data) { + body = Object.keys(data).map(function(k) { + return encodeURIComponent(k) + '=' + encodeURIComponent(data[k]); + }).join('&'); + } + xhr.send(body); + } + + function pollStatus() { + let xhr = new XMLHttpRequest(); + xhr.open('GET', '/api/autorollback/service/status', true); + xhr.onload = function() { + try { + let data = JSON.parse(xhr.responseText); + let state = data.system_state || 'disabled'; + let safeMode = data.safe_mode || {}; + + if (state === 'safe_mode' && safeMode.remaining_seconds > 0) { + showBanner(safeMode.remaining_seconds, safeMode.timeout || 120); + setPolling(POLL_INTERVAL_ACTIVE); + } else { + hideBanner(); + setPolling(POLL_INTERVAL_IDLE); + } + } catch (e) { + // Silently ignore parse errors — API might be temporarily unavailable + } + }; + xhr.onerror = function() { + // API unreachable — could be mid-rollback, keep polling + }; + xhr.send(); + } + + function setPolling(interval) { + if (interval === currentPollInterval && pollTimer) return; + currentPollInterval = interval; + if (pollTimer) clearInterval(pollTimer); + pollTimer = setInterval(pollStatus, interval); + } + + // Initialize + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', function() { + pollStatus(); + setPolling(POLL_INTERVAL_IDLE); + }); + } else { + pollStatus(); + setPolling(POLL_INTERVAL_IDLE); + } +})(); diff --git a/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js b/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js new file mode 100644 index 000000000..1096d935e --- /dev/null +++ b/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2026 MP Lindsey + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * OPNsense Auto Rollback - Dashboard Widget + * + * Shows real-time safe mode status with countdown, one-click + * start/confirm/cancel controls directly from the dashboard. + */ +export default class AutoRollback extends BaseWidget { + constructor() { + super(); + this.tickTimeout = 2; + } + + getMarkup() { + return $(` +
+ + +
+ + + Loading + + +
+ + + + + +
+ + + + +
+ + + +
+ `); + } + + async onMarkupRendered() { + const self = this; + + $('#arw-btn-start').on('click', async function() { + $(this).prop('disabled', true); + try { + await self.ajaxCall('/api/autorollback/service/start', {}, 'POST'); + self.tickTimeout = 1; + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + }); + + $('#arw-btn-confirm').on('click', async function() { + $(this).prop('disabled', true); + try { + await self.ajaxCall('/api/autorollback/service/confirm', {}, 'POST'); + self.tickTimeout = 2; + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + }); + + $('#arw-btn-revert').on('click', async function() { + if (confirm('Revert to previous configuration? The system may reboot.')) { + $(this).prop('disabled', true); + try { + await self.ajaxCall('/api/autorollback/service/cancel', {}, 'POST'); + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + } + }); + + $('#arw-btn-extend').on('click', async function() { + try { + await self.ajaxCall('/api/autorollback/service/extend', JSON.stringify({seconds: 60}), 'POST'); + } catch(e) { /* ignore */ } + await self.onWidgetTick(); + }); + } + + async onWidgetTick() { + try { + const data = await this.ajaxCall('/api/autorollback/service/status'); + if (!data || data.status === 'error') { + this._renderError(); + return; + } + this._renderStatus(data); + } catch(e) { + this._renderError(); + } + } + + _renderStatus(data) { + const state = data.system_state || 'disabled'; + const safeMode = data.safe_mode || {}; + const watchdog = data.watchdog || {}; + + const badge = $('#arw-badge'); + const dot = $('#arw-dot'); + const badgeText = $('#arw-badge-text'); + + badge.css({'background': '#e9ecef', 'color': '#495057'}); + dot.css({'background': '#6c757d', 'animation': 'none'}); + + if (state === 'safe_mode') { + badge.css({'background': '#fff3cd', 'color': '#856404'}); + dot.css({'background': '#f0ad4e', 'animation': 'arw-blink 1s infinite'}); + badgeText.text('Safe Mode'); + this.tickTimeout = 1; + } else if (state === 'restoring') { + badge.css({'background': '#f8d7da', 'color': '#721c24'}); + dot.css({'background': '#d9534f', 'animation': 'arw-blink 0.5s infinite'}); + badgeText.text('Restoring'); + } else if (state === 'armed') { + badge.css({'background': '#d4edda', 'color': '#155724'}); + dot.css({'background': '#28a745'}); + badgeText.text('Armed'); + this.tickTimeout = 5; + } else { + badgeText.text('Disabled'); + this.tickTimeout = 10; + } + + const method = data.settings?.rollback_method || ''; + $('#arw-method').text(method === 'reboot' ? 'reboot' : method === 'reload' ? 'reload' : ''); + + if (state === 'safe_mode' && safeMode.remaining_seconds > 0) { + const remaining = Math.round(safeMode.remaining_seconds); + const total = safeMode.timeout || 120; + const pct = total > 0 ? (remaining / total) * 100 : 0; + + let mins = Math.floor(remaining / 60); + let secs = remaining % 60; + let display = mins > 0 + ? `${mins}m ${String(secs).padStart(2,'0')}s` + : `${secs}s`; + $('#arw-countdown').html(display); + + let barColor = pct > 50 ? '#5cb85c' : (pct > 20 ? '#f0ad4e' : '#d9534f'); + $('#arw-bar').css({'width': pct + '%', 'background': barColor}); + + $('#arw-countdown-section').show(); + } else { + $('#arw-countdown-section').hide(); + } + + $('#arw-btn-start').toggle(state === 'armed').prop('disabled', false); + $('#arw-btn-confirm').toggle(state === 'safe_mode').prop('disabled', false); + $('#arw-btn-revert').toggle(state === 'safe_mode').prop('disabled', false); + $('#arw-btn-extend').toggle(state === 'safe_mode').prop('disabled', false); + + if (watchdog.enabled) { + let wdText = 'Watchdog: monitoring'; + if (watchdog.fail_count > 0) { + wdText = `Watchdog: ${watchdog.fail_count} failure(s)`; + } + $('#arw-watchdog-text').text(wdText); + $('#arw-watchdog').show(); + } else { + $('#arw-watchdog').hide(); + } + } + + _renderError() { + $('#arw-badge').css({'background': '#f8d7da', 'color': '#721c24'}); + $('#arw-badge-text').text('Error'); + $('#arw-countdown-section').hide(); + $('#arw-btn-start, #arw-btn-confirm, #arw-btn-revert, #arw-btn-extend').hide(); + } +} diff --git a/sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml b/sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml new file mode 100644 index 000000000..b2518271c --- /dev/null +++ b/sysutils/autorollback/src/opnsense/www/js/widgets/Metadata/AutoRollback.xml @@ -0,0 +1,11 @@ + + + AutoRollback.js + + /api/autorollback/service/status + + + Auto Rollback + + + From 5631c07e3d52e6f8990660b6211a98737a879e41 Mon Sep 17 00:00:00 2001 From: MP Lindsey Date: Wed, 18 Feb 2026 09:44:33 +0000 Subject: [PATCH 2/2] sysutils/autorollback: fix extend button sending JSON instead of form data The dashboard widget's extend button was using JSON.stringify() to send the seconds parameter, but the PHP controller reads it via getPost() which expects form-encoded data. Changed to pass a plain object so jQuery serializes it correctly as form data. Co-Authored-By: Claude Opus 4.6 --- .../autorollback/src/opnsense/www/js/widgets/AutoRollback.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js b/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js index 1096d935e..2dddf459c 100644 --- a/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js +++ b/sysutils/autorollback/src/opnsense/www/js/widgets/AutoRollback.js @@ -135,7 +135,7 @@ export default class AutoRollback extends BaseWidget { $('#arw-btn-extend').on('click', async function() { try { - await self.ajaxCall('/api/autorollback/service/extend', JSON.stringify({seconds: 60}), 'POST'); + await self.ajaxCall('/api/autorollback/service/extend', {seconds: 60}, 'POST'); } catch(e) { /* ignore */ } await self.onWidgetTick(); });