diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml
index ca36a64deb..7288c3b712 100644
--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@@ -133,33 +133,46 @@
RebootWatchdogSec=
KExecWatchdogSec=
- Configure the hardware watchdog at runtime and at reboot. Takes a timeout value in seconds (or
- in other time units if suffixed with ms, min, h,
- d, w). If RuntimeWatchdogSec= is set to a non-zero
- value, the watchdog hardware (/dev/watchdog or the path specified with
- WatchdogDevice= or the kernel option systemd.watchdog-device=) will be
- programmed to automatically reboot the system if it is not contacted within the specified timeout interval. The
- system manager will ensure to contact it at least once in half the specified timeout interval. This feature
- requires a hardware watchdog device to be present, as it is commonly the case in embedded and server
- systems. Not all hardware watchdogs allow configuration of all possible reboot timeout values, in which case
- the closest available timeout is picked. RebootWatchdogSec= may be used to configure the
- hardware watchdog when the system is asked to reboot. It works as a safety net to ensure that the reboot takes
- place even if a clean reboot attempt times out. Note that the RebootWatchdogSec= timeout
- applies only to the second phase of the reboot, i.e. after all regular services are already terminated, and
- after the system and service manager process (PID 1) got replaced by the systemd-shutdown
- binary, see system bootup7
- for details. During the first phase of the shutdown operation the system and service manager remains running
- and hence RuntimeWatchdogSec= is still honoured. In order to define a timeout on this first
- phase of system shutdown, configure JobTimeoutSec= and JobTimeoutAction=
- in the [Unit] section of the shutdown.target unit. By default
- RuntimeWatchdogSec= defaults to 0 (off), and RebootWatchdogSec= to
- 10min. KExecWatchdogSec= may be used to additionally enable the watchdog when kexec
- is being executed rather than when rebooting. Note that if the kernel does not reset the watchdog on kexec (depending
- on the specific hardware and/or driver), in this case the watchdog might not get disabled after kexec succeeds
- and thus the system might get rebooted, unless RuntimeWatchdogSec= is also enabled at the same time.
- For this reason it is recommended to enable KExecWatchdogSec= only if
- RuntimeWatchdogSec= is also enabled.
- These settings have no effect if a hardware watchdog is not available.
+ Configure the hardware watchdog at runtime and at reboot. Takes a timeout value in
+ seconds (or in other time units if suffixed with ms, min,
+ h, d, w). If set to zero the watchdog logic
+ is disabled: no watchdog device is opened, configured, or pinged. If set to the special string
+ default the watchdog is opened and pinged in regular intervals, but the timeout
+ is not changed from the default. If set to any other time value the watchdog timeout is configured to
+ the specified value (or a value close to it, depending on hardware capabilities).
+
+ If RuntimeWatchdogSec= is set to a non-zero value, the watchdog hardware
+ (/dev/watchdog or the path specified with WatchdogDevice= or
+ the kernel option systemd.watchdog-device=) will be programmed to automatically
+ reboot the system if it is not contacted within the specified timeout interval. The system manager
+ will ensure to contact it at least once in half the specified timeout interval. This feature requires
+ a hardware watchdog device to be present, as it is commonly the case in embedded and server
+ systems. Not all hardware watchdogs allow configuration of all possible reboot timeout values, in
+ which case the closest available timeout is picked.
+
+ RebootWatchdogSec= may be used to configure the hardware watchdog when the
+ system is asked to reboot. It works as a safety net to ensure that the reboot takes place even if a
+ clean reboot attempt times out. Note that the RebootWatchdogSec= timeout applies
+ only to the second phase of the reboot, i.e. after all regular services are already terminated, and
+ after the system and service manager process (PID 1) got replaced by the
+ systemd-shutdown binary, see system
+ bootup7 for
+ details. During the first phase of the shutdown operation the system and service manager remains
+ running and hence RuntimeWatchdogSec= is still honoured. In order to define a
+ timeout on this first phase of system shutdown, configure JobTimeoutSec= and
+ JobTimeoutAction= in the [Unit] section of the
+ shutdown.target unit. By default RuntimeWatchdogSec= defaults
+ to 0 (off), and RebootWatchdogSec= to 10min.
+
+ KExecWatchdogSec= may be used to additionally enable the watchdog when kexec
+ is being executed rather than when rebooting. Note that if the kernel does not reset the watchdog on
+ kexec (depending on the specific hardware and/or driver), in this case the watchdog might not get
+ disabled after kexec succeeds and thus the system might get rebooted, unless
+ RuntimeWatchdogSec= is also enabled at the same time. For this reason it is
+ recommended to enable KExecWatchdogSec= only if
+ RuntimeWatchdogSec= is also enabled.
+
+ These settings have no effect if a hardware watchdog is not available.
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 10fe579aeb..7c844a29df 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -6334,3 +6334,32 @@ int config_parse_swap_priority(
s->parameters_fragment.priority_set = true;
return 0;
}
+
+int config_parse_watchdog_sec(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+
+ /* This is called for {Runtime,Reboot,KExec}WatchdogSec= where "default" maps to
+ * USEC_INFINITY internally. */
+
+ if (streq(rvalue, "default")) {
+ usec_t *usec = data;
+
+ *usec = USEC_INFINITY;
+ return 0;
+ }
+
+ return config_parse_sec(unit, filename, line, section, section_line, lvalue, ltype, rvalue, data, userdata);
+}
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 9173f30add..96ccb426e6 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -143,6 +143,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_extension_images);
CONFIG_PARSER_PROTOTYPE(config_parse_bpf_foreign_program);
CONFIG_PARSER_PROTOTYPE(config_parse_cgroup_socket_bind);
CONFIG_PARSER_PROTOTYPE(config_parse_restrict_network_interfaces);
+CONFIG_PARSER_PROTOTYPE(config_parse_watchdog_sec);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
diff --git a/src/core/main.c b/src/core/main.c
index 62f39c7378..4b8e923a1e 100644
--- a/src/core/main.c
+++ b/src/core/main.c
@@ -537,6 +537,23 @@ static int parse_proc_cmdline_item(const char *key, const char *value, void *dat
(void) parse_path_argument(value, false, &arg_watchdog_device);
+ } else if (proc_cmdline_key_streq(key, "systemd.watchdog_sec")) {
+
+ if (proc_cmdline_value_missing(key, value))
+ return 0;
+
+ if (streq(value, "default"))
+ arg_runtime_watchdog = USEC_INFINITY;
+ else {
+ r = parse_sec(value, &arg_runtime_watchdog);
+ if (r < 0) {
+ log_warning_errno(r, "Failed to parse systemd.watchdog_sec= argument '%s', ignoring: %m", value);
+ return 0;
+ }
+ }
+
+ arg_kexec_watchdog = arg_reboot_watchdog = arg_runtime_watchdog;
+
} else if (proc_cmdline_key_streq(key, "systemd.clock_usec")) {
if (proc_cmdline_value_missing(key, value))
@@ -688,10 +705,10 @@ static int parse_config_file(void) {
{ "Manager", "NUMAPolicy", config_parse_numa_policy, 0, &arg_numa_policy.type },
{ "Manager", "NUMAMask", config_parse_numa_mask, 0, &arg_numa_policy },
{ "Manager", "JoinControllers", config_parse_warn_compat, DISABLED_CONFIGURATION, NULL },
- { "Manager", "RuntimeWatchdogSec", config_parse_sec, 0, &arg_runtime_watchdog },
- { "Manager", "RebootWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog },
- { "Manager", "ShutdownWatchdogSec", config_parse_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
- { "Manager", "KExecWatchdogSec", config_parse_sec, 0, &arg_kexec_watchdog },
+ { "Manager", "RuntimeWatchdogSec", config_parse_watchdog_sec, 0, &arg_runtime_watchdog },
+ { "Manager", "RebootWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog },
+ { "Manager", "ShutdownWatchdogSec", config_parse_watchdog_sec, 0, &arg_reboot_watchdog }, /* obsolete alias */
+ { "Manager", "KExecWatchdogSec", config_parse_watchdog_sec, 0, &arg_kexec_watchdog },
{ "Manager", "WatchdogDevice", config_parse_path, 0, &arg_watchdog_device },
{ "Manager", "CapabilityBoundingSet", config_parse_capability_set, 0, &arg_capability_bounding_set },
{ "Manager", "NoNewPrivileges", config_parse_bool, 0, &arg_no_new_privs },
@@ -1523,9 +1540,9 @@ static int become_shutdown(
};
_cleanup_strv_free_ char **env_block = NULL;
+ usec_t watchdog_timer = 0;
size_t pos = 7;
int r;
- usec_t watchdog_timer = 0;
assert(shutdown_verb);
assert(!command_line[pos]);
@@ -1574,19 +1591,16 @@ static int become_shutdown(
else if (streq(shutdown_verb, "kexec"))
watchdog_timer = arg_kexec_watchdog;
- if (timestamp_is_set(watchdog_timer)) {
- /* If we reboot or kexec let's set the shutdown watchdog and tell the shutdown binary to
- * repeatedly ping it */
- r = watchdog_setup(watchdog_timer);
- watchdog_close(r < 0);
+ /* If we reboot or kexec let's set the shutdown watchdog and tell the
+ * shutdown binary to repeatedly ping it */
+ r = watchdog_setup(watchdog_timer);
+ watchdog_close(r < 0);
- /* Tell the binary how often to ping, ignore failure */
- (void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
+ /* Tell the binary how often to ping, ignore failure */
+ (void) strv_extendf(&env_block, "WATCHDOG_USEC="USEC_FMT, watchdog_timer);
- if (arg_watchdog_device)
- (void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
- } else
- watchdog_close(true);
+ if (arg_watchdog_device)
+ (void) strv_extendf(&env_block, "WATCHDOG_DEVICE=%s", arg_watchdog_device);
/* Avoid the creation of new processes forked by the kernel; at this
* point, we will not listen to the signals anyway */
diff --git a/src/core/manager.c b/src/core/manager.c
index 6bcb6bd153..167fa1a34a 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -3215,12 +3215,8 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
return;
if (t == WATCHDOG_RUNTIME)
- if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME])) {
- if (timestamp_is_set(timeout))
- (void) watchdog_setup(timeout);
- else
- watchdog_close(true);
- }
+ if (!timestamp_is_set(m->watchdog_overridden[WATCHDOG_RUNTIME]))
+ (void) watchdog_setup(timeout);
m->watchdog[t] = timeout;
}
@@ -3238,10 +3234,7 @@ int manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) {
if (t == WATCHDOG_RUNTIME) {
usec_t usec = timestamp_is_set(timeout) ? timeout : m->watchdog[t];
- if (timestamp_is_set(usec))
- (void) watchdog_setup(usec);
- else
- watchdog_close(true);
+ (void) watchdog_setup(usec);
}
m->watchdog_overridden[t] = timeout;
diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c
index 3f6a2f0228..4de6003039 100644
--- a/src/shared/watchdog.c
+++ b/src/shared/watchdog.c
@@ -15,59 +15,114 @@
#include "watchdog.h"
static int watchdog_fd = -1;
-static char *watchdog_device = NULL;
-static usec_t watchdog_timeout = USEC_INFINITY;
+static char *watchdog_device;
+static usec_t watchdog_timeout; /* 0 → close device and USEC_INFINITY → don't change timeout */
static usec_t watchdog_last_ping = USEC_INFINITY;
-static int update_timeout(void) {
- if (watchdog_fd < 0)
- return 0;
- if (watchdog_timeout == USEC_INFINITY)
- return 0;
+static int watchdog_set_enable(bool enable) {
+ int flags = enable ? WDIOS_ENABLECARD : WDIOS_DISABLECARD;
+ int r;
- if (watchdog_timeout == 0) {
- int flags;
+ assert(watchdog_fd >= 0);
- flags = WDIOS_DISABLECARD;
- if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
+ r = ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags);
+ if (r < 0) {
+ if (!enable)
return log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
- } else {
- int sec, flags;
- usec_t t;
- t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
- sec = MIN(t, (usec_t) INT_MAX); /* Saturate */
- if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
- return log_warning_errno(errno, "Failed to set timeout to %is, ignoring: %m", sec);
-
- /* Just in case the driver is buggy */
- assert(sec > 0);
-
- /* watchdog_timeout stores the actual timeout used by the HW */
- watchdog_timeout = sec * USEC_PER_SEC;
- log_info("Set hardware watchdog to %s.", FORMAT_TIMESPAN(watchdog_timeout, 0));
-
- flags = WDIOS_ENABLECARD;
- if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0) {
- /* ENOTTY means the watchdog is always enabled so we're fine */
- log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
- "Failed to enable hardware watchdog, ignoring: %m");
- if (!ERRNO_IS_NOT_SUPPORTED(errno))
- return -errno;
- }
-
- if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
- return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
-
- watchdog_last_ping = now(clock_boottime_or_monotonic());
+ /* ENOTTY means the watchdog is always enabled so we're fine */
+ log_full_errno(ERRNO_IS_NOT_SUPPORTED(errno) ? LOG_DEBUG : LOG_WARNING, errno,
+ "Failed to enable hardware watchdog, ignoring: %m");
+ if (!ERRNO_IS_NOT_SUPPORTED(errno))
+ return -errno;
}
return 0;
}
+static int watchdog_get_timeout(void) {
+ int sec = 0;
+
+ assert(watchdog_fd > 0);
+
+ if (ioctl(watchdog_fd, WDIOC_GETTIMEOUT, &sec) < 0)
+ return -errno;
+
+ assert(sec > 0);
+ watchdog_timeout = sec * USEC_PER_SEC;
+
+ return 0;
+}
+
+static int watchdog_set_timeout(void) {
+ usec_t t;
+ int sec;
+
+ assert(watchdog_fd >= 0);
+ assert(timestamp_is_set(watchdog_timeout));
+
+ t = DIV_ROUND_UP(watchdog_timeout, USEC_PER_SEC);
+ sec = MIN(t, (usec_t) INT_MAX); /* Saturate */
+
+ if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &sec) < 0)
+ return -errno;
+
+ assert(sec > 0);/* buggy driver ? */
+ watchdog_timeout = sec * USEC_PER_SEC;
+
+ return 0;
+}
+
+static int watchdog_ping_now(void) {
+ assert(watchdog_fd >= 0);
+
+ if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
+ return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
+
+ watchdog_last_ping = now(clock_boottime_or_monotonic());
+
+ return 0;
+}
+
+static int update_timeout(void) {
+ int r;
+
+ assert(watchdog_timeout > 0);
+
+ if (watchdog_fd < 0)
+ return 0;
+
+ if (watchdog_timeout != USEC_INFINITY) {
+ r = watchdog_set_timeout();
+ if (r < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(r))
+ return log_error_errno(r, "Failed to set timeout to %s: %m",
+ FORMAT_TIMESPAN(watchdog_timeout, 0));
+
+ log_info("Modifying watchdog timeout is not supported, reusing the programmed timeout.");
+ watchdog_timeout = USEC_INFINITY;
+ }
+ }
+
+ if (watchdog_timeout == USEC_INFINITY) {
+ r = watchdog_get_timeout();
+ if (r < 0)
+ return log_error_errno(errno, "Failed to query watchdog HW timeout: %m");
+ }
+
+ r = watchdog_set_enable(true);
+ if (r < 0)
+ return r;
+
+ log_info("Watchdog running with a timeout of %s.", FORMAT_TIMESPAN(watchdog_timeout, 0));
+
+ return watchdog_ping_now();
+}
+
static int open_watchdog(void) {
struct watchdog_info ident;
const char *fn;
+ int r;
if (watchdog_fd >= 0)
return 0;
@@ -85,7 +140,11 @@ static int open_watchdog(void) {
ident.firmware_version,
fn);
- return update_timeout();
+ r = update_timeout();
+ if (r < 0)
+ watchdog_close(true);
+
+ return r;
}
int watchdog_set_device(const char *path) {
@@ -102,21 +161,38 @@ int watchdog_set_device(const char *path) {
}
int watchdog_setup(usec_t timeout) {
+ usec_t previous_timeout;
+ int r;
+
+ /* timeout=0 closes the device whereas passing timeout=USEC_INFINITY
+ * opens it (if needed) without configuring any particular timeout and
+ * thus reuses the programmed value (therefore it's a nop if the device
+ * is already opened).
+ */
+
+ if (timeout == 0) {
+ watchdog_close(true);
+ return 0;
+ }
+
+ /* Let's shortcut duplicated requests */
+ if (watchdog_fd >= 0 && (timeout == watchdog_timeout || timeout == USEC_INFINITY))
+ return 0;
/* Initialize the watchdog timeout with the caller value. This value is
* going to be updated by update_timeout() with the closest value
* supported by the driver */
+ previous_timeout = watchdog_timeout;
watchdog_timeout = timeout;
- /* If we didn't open the watchdog yet and didn't get any explicit
- * timeout value set, don't do anything */
- if (watchdog_fd < 0 && watchdog_timeout == USEC_INFINITY)
- return 0;
-
if (watchdog_fd < 0)
return open_watchdog();
- return update_timeout();
+ r = update_timeout();
+ if (r < 0)
+ watchdog_timeout = previous_timeout;
+
+ return r;
}
usec_t watchdog_runtime_wait(void) {
@@ -138,7 +214,7 @@ usec_t watchdog_runtime_wait(void) {
int watchdog_ping(void) {
usec_t ntime;
- if (!timestamp_is_set(watchdog_timeout))
+ if (watchdog_timeout == 0)
return 0;
if (watchdog_fd < 0)
@@ -155,24 +231,20 @@ int watchdog_ping(void) {
return 0;
}
- if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0)
- return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m");
-
- watchdog_last_ping = ntime;
- return 0;
+ return watchdog_ping_now();
}
void watchdog_close(bool disarm) {
+
+ /* Once closed, pinging the device becomes a NOP and we request a new
+ * call to watchdog_setup() to open the device again. */
+ watchdog_timeout = 0;
+
if (watchdog_fd < 0)
return;
if (disarm) {
- int flags;
-
- /* Explicitly disarm it */
- flags = WDIOS_DISABLECARD;
- if (ioctl(watchdog_fd, WDIOC_SETOPTIONS, &flags) < 0)
- log_warning_errno(errno, "Failed to disable hardware watchdog, ignoring: %m");
+ (void) watchdog_set_enable(false);
/* To be sure, use magic close logic, too */
for (;;) {
@@ -189,8 +261,4 @@ void watchdog_close(bool disarm) {
}
watchdog_fd = safe_close(watchdog_fd);
-
- /* Once closed, pinging the device becomes a NOP and we request a new
- * call to watchdog_setup() to open the device again. */
- watchdog_timeout = USEC_INFINITY;
}