diff --git a/man/kernel-command-line.xml b/man/kernel-command-line.xml index d6123491f3..9b04aa1706 100644 --- a/man/kernel-command-line.xml +++ b/man/kernel-command-line.xml @@ -428,6 +428,18 @@ + + systemd.watchdog_pre_sec= + + + Overrides the watchdog pre-timeout settings otherwise configured with + RuntimeWatchdogPreSec=. Takes a time value (if no unit is specified, seconds is the + implicitly assumed time unit) or the special strings off or + default. For details, see + systemd-system.conf5. + + + systemd.cpu_affinity= diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 8171241e4e..8976521589 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -402,6 +402,9 @@ node /org/freedesktop/systemd1 { readwrite t RuntimeWatchdogUSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") @org.freedesktop.systemd1.Privileged("true") + readwrite t RuntimeWatchdogPreUSec = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + @org.freedesktop.systemd1.Privileged("true") readwrite t RebootWatchdogUSec = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") @org.freedesktop.systemd1.Privileged("true") @@ -650,6 +653,8 @@ node /org/freedesktop/systemd1 { + + @@ -1052,6 +1057,8 @@ node /org/freedesktop/systemd1 { + + diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index 3805a010e2..01eff0839e 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -177,6 +177,40 @@ These settings have no effect if a hardware watchdog is not available. + + RuntimeWatchdogPreSec= + + Configure the hardware watchdog device pre-timeout value. + Takes a timeout value in seconds (or in other time units similar to + RuntimeWatchdogSec=). A watchdog pre-timeout is a + notification generated by the watchdog before the watchdog reset might + occur in the event the watchdog has not been serviced. This notification + is handled by the kernel and can be configured to take an action (i.e. + generate a kernel panic) using the + /sys/class/watchdog/watchdog0/pretimeout_governor + sysfs file for your watchdog device. The available actions (or + governors) are listed in the + /sys/class/watchdog/watchdog0/pretimeout_available_governors + sysfs file for your watchdog device. The default action for the + pre-timeout event is to log a kernel message but that can be changed in + the kernel's configuration. Not all watchdog hardware or drivers support + generating a pre-timeout and depending on the state of the system, the + kernel may be unable to take the configured action before the watchdog + reboot. The watchdog will be configured to generate the pre-timeout event + at the amount of time specified by RuntimeWatchdogPreSec= + before the runtime watchdog timeout (set by + RuntimeWatchdogSec=). For example, if the we have + RuntimeWatchdogSec=30 and + RuntimeWatchdogPreSec=10, then the pre-timeout event + will occur if the watchdog has not pinged for 20s (10s before the + watchdog would fire). By default, RuntimeWatchdogPreSec= + defaults to 0 (off). The value set for RuntimeWatchdogPreSec= + must be smaller than the timeout value for RuntimeWatchdogSec=. + This setting has no effect if a hardware watchdog is not available or the + hardware watchdog does not support a pre-timeout and will be ignored by the + kernel if the setting is greater than the actual watchdog timeout. + + WatchdogDevice= diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 8dbb61eb05..9d0ce35bac 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -265,6 +265,24 @@ static int property_get_runtime_watchdog( return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_RUNTIME)); } +static int property_get_pretimeout_watchdog( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + Manager *m = userdata; + + assert(m); + assert(bus); + assert(reply); + + return sd_bus_message_append(reply, "t", manager_get_watchdog(m, WATCHDOG_PRETIMEOUT)); +} + static int property_get_reboot_watchdog( sd_bus *bus, const char *path, @@ -330,6 +348,18 @@ static int property_set_runtime_watchdog( return property_set_watchdog(userdata, WATCHDOG_RUNTIME, value); } +static int property_set_pretimeout_watchdog( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *value, + void *userdata, + sd_bus_error *error) { + + return property_set_watchdog(userdata, WATCHDOG_PRETIMEOUT, value); +} + static int property_set_reboot_watchdog( sd_bus *bus, const char *path, @@ -2696,6 +2726,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("DefaultStandardOutput", "s", bus_property_get_exec_output, offsetof(Manager, default_std_output), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DefaultStandardError", "s", bus_property_get_exec_output, offsetof(Manager, default_std_error), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogUSec", "t", property_get_runtime_watchdog, property_set_runtime_watchdog, 0, 0), + SD_BUS_WRITABLE_PROPERTY("RuntimeWatchdogPreUSec", "t", property_get_pretimeout_watchdog, property_set_pretimeout_watchdog, 0, 0), SD_BUS_WRITABLE_PROPERTY("RebootWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, 0), /* The following item is an obsolete alias */ SD_BUS_WRITABLE_PROPERTY("ShutdownWatchdogUSec", "t", property_get_reboot_watchdog, property_set_reboot_watchdog, 0, SD_BUS_VTABLE_HIDDEN), diff --git a/src/core/main.c b/src/core/main.c index fabca41189..dd6a51dfe9 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -138,6 +138,7 @@ static unsigned arg_default_start_limit_burst; static usec_t arg_runtime_watchdog; static usec_t arg_reboot_watchdog; static usec_t arg_kexec_watchdog; +static usec_t arg_pretimeout_watchdog; static char *arg_early_core_pattern; static char *arg_watchdog_device; static char **arg_default_environment; @@ -557,6 +558,23 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog; + } else if (proc_cmdline_key_streq(key, "systemd.watchdog_pre_sec")) { + + if (proc_cmdline_value_missing(key, value)) + return 0; + + if (streq(value, "default")) + arg_pretimeout_watchdog = USEC_INFINITY; + else if (streq(value, "off")) + arg_pretimeout_watchdog = 0; + else { + r = parse_sec(value, &arg_pretimeout_watchdog); + if (r < 0) { + log_warning_errno(r, "Failed to parse systemd.watchdog_pre_sec= argument '%s', ignoring: %m", value); + return 0; + } + } + } else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) { if (proc_cmdline_value_missing(key, value)) @@ -709,6 +727,7 @@ static int parse_config_file(void) { { "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy }, { "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL }, { "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog }, + { "Manager", "RuntimeWatchdogPreSec", config_parse_watchdog_sec, 0, &arg_pretimeout_watchdog }, { "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, { "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */ { "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog }, @@ -851,6 +870,7 @@ static void set_manager_settings(Manager *m) { manager_set_watchdog(m, WATCHDOG_RUNTIME, arg_runtime_watchdog); manager_set_watchdog(m, WATCHDOG_REBOOT, arg_reboot_watchdog); manager_set_watchdog(m, WATCHDOG_KEXEC, arg_kexec_watchdog); + manager_set_watchdog(m, WATCHDOG_PRETIMEOUT, arg_pretimeout_watchdog); manager_set_show_status(m, arg_show_status, "commandline"); m->status_unit_format = arg_status_unit_format; @@ -1595,7 +1615,9 @@ static int become_shutdown( watchdog_timer = arg_kexec_watchdog; /* If we reboot or kexec let's set the shutdown watchdog and tell the - * shutdown binary to repeatedly ping it */ + * shutdown binary to repeatedly ping it. + * Disable the pretimeout watchdog, as we do not support it from the shutdown binary. */ + (void) watchdog_setup_pretimeout(0); r = watchdog_setup(watchdog_timer); watchdog_close(r < 0); @@ -2448,6 +2470,7 @@ static void reset_arguments(void) { arg_runtime_watchdog = 0; arg_reboot_watchdog = 10 * USEC_PER_MINUTE; arg_kexec_watchdog = 0; + arg_pretimeout_watchdog = 0; arg_early_core_pattern = NULL; arg_watchdog_device = NULL; diff --git a/src/core/manager-serialize.c b/src/core/manager-serialize.c index 60a35f48f3..58063f0193 100644 --- a/src/core/manager-serialize.c +++ b/src/core/manager-serialize.c @@ -118,6 +118,7 @@ int manager_serialize( (void) serialize_usec(f, "runtime-watchdog-overridden", m->watchdog_overridden[WATCHDOG_RUNTIME]); (void) serialize_usec(f, "reboot-watchdog-overridden", m->watchdog_overridden[WATCHDOG_REBOOT]); (void) serialize_usec(f, "kexec-watchdog-overridden", m->watchdog_overridden[WATCHDOG_KEXEC]); + (void) serialize_usec(f, "pretimeout-watchdog-overridden", m->watchdog_overridden[WATCHDOG_PRETIMEOUT]); for (ManagerTimestamp q = 0; q < _MANAGER_TIMESTAMP_MAX; q++) { _cleanup_free_ char *joined = NULL; @@ -455,6 +456,14 @@ int manager_deserialize(Manager *m, FILE *f, FDSet *fds) { else manager_override_watchdog(m, WATCHDOG_KEXEC, t); + } else if ((val = startswith(l, "pretimeout-watchdog-overridden="))) { + usec_t t; + + if (deserialize_usec(val, &t) < 0) + log_notice("Failed to parse pretimeout-watchdog-overridden value '%s', ignoring.", val); + else + manager_override_watchdog(m, WATCHDOG_PRETIMEOUT, t); + } else if (startswith(l, "env=")) { r = deserialize_environment(l + 4, &m->client_environment); if (r < 0) diff --git a/src/core/manager.c b/src/core/manager.c index 8841be4a72..117df23e3d 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -813,6 +813,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager .watchdog_overridden[WATCHDOG_RUNTIME] = USEC_INFINITY, .watchdog_overridden[WATCHDOG_REBOOT] = USEC_INFINITY, .watchdog_overridden[WATCHDOG_KEXEC] = USEC_INFINITY, + .watchdog_overridden[WATCHDOG_PRETIMEOUT] = USEC_INFINITY, .show_status_overridden = _SHOW_STATUS_INVALID, @@ -3232,9 +3233,12 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) { if (m->watchdog[t] == timeout) return; - if (t == WATCHDOG_RUNTIME) + if (t == WATCHDOG_RUNTIME) { if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) (void) watchdog_setup(timeout); + } else if (t == WATCHDOG_PRETIMEOUT) + if (m->watchdog_overridden[WATCHDOG_PRETIMEOUT] == USEC_INFINITY) + (void) watchdog_setup_pretimeout(timeout); m->watchdog[t] = timeout; } @@ -3253,7 +3257,8 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) { usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t]; (void) watchdog_setup(usec); - } + } else if (t == WATCHDOG_PRETIMEOUT) + (void) watchdog_setup_pretimeout(timeout); m->watchdog_overridden[t] = timeout; } diff --git a/src/core/manager.h b/src/core/manager.h index 8f9984aa51..285da9451e 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -118,6 +118,7 @@ typedef enum WatchdogType { WATCHDOG_RUNTIME, WATCHDOG_REBOOT, WATCHDOG_KEXEC, + WATCHDOG_PRETIMEOUT, _WATCHDOG_TYPE_MAX, } WatchdogType; diff --git a/src/core/system.conf.in b/src/core/system.conf.in index 96fb64d2c1..7fd42452eb 100644 --- a/src/core/system.conf.in +++ b/src/core/system.conf.in @@ -30,6 +30,7 @@ #NUMAPolicy=default #NUMAMask= #RuntimeWatchdogSec=off +#RuntimeWatchdogPreSec=off #RebootWatchdogSec=10min #KExecWatchdogSec=off #WatchdogDevice= diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index 98ef979dbe..f6e1496330 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -18,6 +18,7 @@ static int watchdog_fd = -1; static char *watchdog_device; static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */ +static usec_t watchdog_pretimeout; /* 0 → disable pretimeout and USEC_INFINITY → don't change pretimeout */ static usec_t watchdog_last_ping = USEC_INFINITY; /* Starting from kernel version 4.5, the maximum allowable watchdog timeout is @@ -84,6 +85,46 @@ static int watchdog_set_timeout(void) { return 0; } +static int watchdog_get_pretimeout(void) { + int sec = 0; + + assert(watchdog_fd >= 0); + + if (ioctl(watchdog_fd, WDIOC_GETPRETIMEOUT, &sec) < 0) { + watchdog_pretimeout = 0; + return log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno, "Failed to get pretimeout value, ignoring: %m"); + } + + watchdog_pretimeout = sec * USEC_PER_SEC; + + return 0; +} + +static int watchdog_set_pretimeout(void) { + int sec; + + assert(watchdog_fd >= 0); + assert(watchdog_pretimeout != USEC_INFINITY); + + sec = saturated_usec_to_sec(watchdog_pretimeout); + + if (ioctl(watchdog_fd, WDIOC_SETPRETIMEOUT, &sec) < 0) { + watchdog_pretimeout = 0; + + if (ERRNO_IS_NOT_SUPPORTED(errno)) { + log_info("Watchdog does not support pretimeouts."); + return 0; + } + + return log_error_errno(errno, "Failed to set pretimeout to %s: %m", FORMAT_TIMESPAN(sec, USEC_PER_SEC)); + } + + /* The set ioctl does not return the actual value set so get it now. */ + (void) watchdog_get_pretimeout(); + + return 0; +} + static int watchdog_ping_now(void) { assert(watchdog_fd >= 0); @@ -95,6 +136,34 @@ static int watchdog_ping_now(void) { return 0; } +static int update_pretimeout(void) { + int r, t_sec, pt_sec; + + if (watchdog_fd < 0) + return 0; + + if (watchdog_timeout == USEC_INFINITY || watchdog_pretimeout == USEC_INFINITY) + return 0; + + /* Determine if the pretimeout is valid for the current watchdog timeout. */ + t_sec = saturated_usec_to_sec(watchdog_timeout); + pt_sec = saturated_usec_to_sec(watchdog_pretimeout); + if (pt_sec >= t_sec) { + r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Cannot set watchdog pretimeout to %is (%s watchdog timeout of %is)", + pt_sec, pt_sec == t_sec ? "same as" : "longer than", t_sec); + (void) watchdog_get_pretimeout(); + } else + r = watchdog_set_pretimeout(); + + if (watchdog_pretimeout == 0) + log_info("Watchdog pretimeout is disabled."); + else + log_info("Watchdog running with a pretimeout of %s.", FORMAT_TIMESPAN(watchdog_pretimeout, 0)); + + return r; +} + static int update_timeout(void) { int r; @@ -121,6 +190,12 @@ static int update_timeout(void) { return log_error_errno(r, "Failed to query watchdog HW timeout: %m"); } + /* If the watchdog timeout was changed, the pretimeout could have been + * changed as well by the driver or the kernel so we need to update the + * pretimeout now. Or if the watchdog is being configured for the first + * time, we want to configure the pretimeout before it is enabled. */ + (void) update_pretimeout(); + r = watchdog_set_enable(true); if (r < 0) return r; @@ -210,9 +285,31 @@ int watchdog_setup(usec_t timeout) { return r; } -usec_t watchdog_runtime_wait(void) { +int watchdog_setup_pretimeout(usec_t timeout) { + /* timeout=0 disables the pretimeout whereas timeout=USEC_INFINITY is a nop. */ + if ((watchdog_fd >= 0 && timeout == watchdog_pretimeout) || timeout == USEC_INFINITY) + return 0; - if (!timestamp_is_set(watchdog_timeout)) + /* Initialize the watchdog timeout with the caller value. This value is + * going to be updated by update_pretimeout() with the running value, + * even if it fails to update the timeout. */ + watchdog_pretimeout = timeout; + + return update_pretimeout(); +} + +static usec_t calc_timeout(void) { + /* Calculate the effective timeout which accounts for the watchdog + * pretimeout if configured and supported. */ + if (timestamp_is_set(watchdog_pretimeout) && watchdog_timeout >= watchdog_pretimeout) + return watchdog_timeout - watchdog_pretimeout; + else + return watchdog_timeout; +} + +usec_t watchdog_runtime_wait(void) { + usec_t timeout = calc_timeout(); + if (!timestamp_is_set(timeout)) return USEC_INFINITY; /* Sleep half the watchdog timeout since the last successful ping at most */ @@ -220,14 +317,14 @@ usec_t watchdog_runtime_wait(void) { usec_t ntime = now(clock_boottime_or_monotonic()); assert(ntime >= watchdog_last_ping); - return usec_sub_unsigned(watchdog_last_ping + (watchdog_timeout / 2), ntime); + return usec_sub_unsigned(watchdog_last_ping + (timeout / 2), ntime); } - return watchdog_timeout / 2; + return timeout / 2; } int watchdog_ping(void) { - usec_t ntime; + usec_t ntime, timeout; if (watchdog_timeout == 0) return 0; @@ -237,12 +334,13 @@ int watchdog_ping(void) { return open_watchdog(); ntime = now(clock_boottime_or_monotonic()); + timeout = calc_timeout(); /* Never ping earlier than watchdog_timeout/4 and try to ping - * by watchdog_timeout/2 plus scheduling latencies the latest */ + * by watchdog_timeout/2 plus scheduling latencies at the latest */ if (timestamp_is_set(watchdog_last_ping)) { assert(ntime >= watchdog_last_ping); - if ((ntime - watchdog_last_ping) < (watchdog_timeout / 4)) + if ((ntime - watchdog_last_ping) < (timeout / 4)) return 0; } diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h index 6e99bbdf56..dc259f0a32 100644 --- a/src/shared/watchdog.h +++ b/src/shared/watchdog.h @@ -8,6 +8,7 @@ int watchdog_set_device(const char *path); int watchdog_setup(usec_t timeout); +int watchdog_setup_pretimeout(usec_t usec); int watchdog_ping(void); void watchdog_close(bool disarm); usec_t watchdog_runtime_wait(void); diff --git a/test/fuzz/fuzz-unit-file/directives-all.service b/test/fuzz/fuzz-unit-file/directives-all.service index 3b5ea3f9e3..b1890b91fa 100644 --- a/test/fuzz/fuzz-unit-file/directives-all.service +++ b/test/fuzz/fuzz-unit-file/directives-all.service @@ -737,6 +737,7 @@ LogLevel= LogLocation= LogTarget= RuntimeWatchdogSec= +RuntimeWatchdogPreSec= ShowStatus= RebootWatchdogSec= ShutdownWatchdogSec=