diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index b9120cc222..d445c138fa 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2115,6 +2115,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly as Refs = ['...', ...]; readonly a(ss) ActivationDetails = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly b DebugInvocation = ...; }; interface org.freedesktop.DBus.Peer { ... }; interface org.freedesktop.DBus.Introspectable { ... }; @@ -2458,6 +2460,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -2644,6 +2648,13 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { systemd.exec5. Note that new key value pair may be added at any time in future versions. Existing entries will not be removed. + + DebugInvocation contains a boolean that will change to + true when the unit is restarted with log level set to debug due to an earlier + failure, and will change back to false when either the unit successfully starts + or the restart rate limit is reached. See the RestartMode= section in + systemd.service5 + for more details. @@ -12079,6 +12090,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ QueueSignal() was added in version 254. SurviveFinalKillSignal was added in version 255. WantsMountsFor was added in version 256. + DebugInvocation was added in version 257. Service Unit Objects diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index fe19c8a657..c9ae38bf1f 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -4184,6 +4184,18 @@ StandardInputData=V2XigLJyZSBubyBzdHJhbmdlcnMgdG8gbG92ZQpZb3Uga25vdyB0aGUgcnVsZX + + $DEBUG_INVOCATION + + If RestartMode=debug is set, and a previous attempt at starting + the unit failed, this variable will be passed to the service to indicate that additional logging + should be enabled at startup. See + systemd.service5 + for more details. + + + + For system services, when PAMName= is enabled and pam_systemd is part diff --git a/man/systemd.service.xml b/man/systemd.service.xml index d3ed79effb..047fb912e9 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -936,20 +936,40 @@ Takes a string value that specifies how a service should restart: - If set to (the default), the service restarts by - going through a failed/inactive state. + + If set to (the default), the service restarts by going through + a failed/inactive state. - If set to , the service transitions to the activating - state directly during auto-restart, skipping failed/inactive state. - ExecStopPost= is invoked. - OnSuccess= and OnFailure= are skipped. + + + + + If set to , the service transitions to the activating + state directly during auto-restart, skipping failed/inactive state. + ExecStopPost= is invoked. + OnSuccess= and OnFailure= are skipped. + + This option is useful in cases where a dependency can fail temporarily but we don't + want these temporary failures to make the dependent units fail. Dependent units are not + notified of these temporary failures. + + + + + + If set to , the service manager will log messages that are + related to this unit at debug level while automated restarts are attempted, until either the + service hits the rate limit or it succeeds, and the $DEBUG_INVOCATION=1 + environment variable will be set for the unit. This is useful to be able to get additional + information when a service fails to start, without needing to proactively or permanently + enable debug level logging in systemd, which is very verbose. This is otherwise equivalent + to mode. + + + - This option is useful in cases where a dependency can fail temporarily - but we don't want these temporary failures to make the dependent units fail. - When this option is set to , dependent units are not notified of these temporary failures. - diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index a51d1da3df..f1136a95b7 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -920,6 +920,7 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("CollectMode", "s", property_get_collect_mode, offsetof(Unit, collect_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Refs", "as", property_get_refs, 0, 0), SD_BUS_PROPERTY("ActivationDetails", "a(ss)", bus_property_get_activation_details, offsetof(Unit, activation_details), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("DebugInvocation", "b", bus_property_get_bool, offsetof(Unit, debug_invocation), 0), SD_BUS_METHOD_WITH_ARGS("Start", SD_BUS_ARGS("s", mode), diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 98d8b0c7d1..a9ed0ca11f 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -4107,7 +4107,9 @@ int exec_invoke( /* This should be mostly redundant, as the log level is also passed as an argument of the executor, * and is already applied earlier. Just for safety. */ - if (context->log_level_max >= 0) + if (params->debug_invocation) + log_set_max_level(LOG_PRI(LOG_DEBUG)); + else if (context->log_level_max >= 0) log_set_max_level(context->log_level_max); /* Explicitly test for CVE-2021-4034 inspired invocations */ diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 38bd969560..84628f91fb 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -1419,6 +1419,10 @@ static int exec_parameters_serialize(const ExecParameters *p, const ExecContext if (r < 0) return r; + r = serialize_bool_elide(f, "exec-parameters-debug-invocation", p->debug_invocation); + if (r < 0) + return r; + fputc('\n', f); /* End marker */ return 0; @@ -1681,6 +1685,12 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) { return r; sd_id128_to_string(p->invocation_id, p->invocation_id_string); + } else if ((val = startswith(l, "exec-parameters-debug-invocation="))) { + r = parse_boolean(val); + if (r < 0) + return r; + + p->debug_invocation = r; } else log_warning("Failed to parse serialized line, ignoring: %s", l); } diff --git a/src/core/execute.c b/src/core/execute.c index 55cf9ef379..5bbd1d835c 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -927,6 +927,7 @@ void exec_params_dump(const ExecParameters *p, FILE* f, const char *prefix) { "%sShallConfirmSpawn: %s\n" "%sWatchdogUSec: " USEC_FMT "\n" "%sNotifySocket: %s\n" + "%sDebugInvocation: %s\n" "%sFallbackSmackProcessLabel: %s\n", prefix, runtime_scope_to_string(p->runtime_scope), prefix, p->flags, @@ -939,6 +940,7 @@ void exec_params_dump(const ExecParameters *p, FILE* f, const char *prefix) { prefix, yes_no(p->shall_confirm_spawn), prefix, p->watchdog_usec, prefix, strempty(p->notify_socket), + prefix, yes_no(p->debug_invocation), prefix, strempty(p->fallback_smack_process_label)); strv_dump(f, prefix, "FdNames", p->fd_names); diff --git a/src/core/execute.h b/src/core/execute.h index 15c7473a7c..a3fc52bfd0 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -462,6 +462,8 @@ struct ExecParameters { char *unit_id; sd_id128_t invocation_id; char invocation_id_string[SD_ID128_STRING_MAX]; + + bool debug_invocation; }; #define EXEC_PARAMETERS_INIT(_flags) \ @@ -629,7 +631,8 @@ bool exec_needs_ipc_namespace(const ExecContext *context); const ExecContext *_c = (ec); \ const ExecParameters *_p = (ep); \ const int _l = (level); \ - bool _do_log = _c->log_level_max < 0 || \ + bool _do_log = _p->debug_invocation || \ + _c->log_level_max < 0 || \ _c->log_level_max >= LOG_PRI(_l); \ LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields, \ _c->n_log_extra_fields); \ @@ -674,7 +677,8 @@ bool exec_needs_ipc_namespace(const ExecContext *context); const ExecContext *_c = (ec); \ const ExecParameters *_p = (ep); \ const int _l = (level); \ - bool _do_log = _c->log_level_max < 0 || \ + bool _do_log = _p->debug_invocation || \ + _c->log_level_max < 0 || \ _c->log_level_max >= LOG_PRI(_l); \ LOG_CONTEXT_PUSH_IOV(_c->log_extra_fields, \ _c->n_log_extra_fields); \ diff --git a/src/core/service.c b/src/core/service.c index 6d3118b963..ed3a46ff8c 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1726,7 +1726,7 @@ static int service_spawn_internal( if (r < 0) return r; - our_env = new0(char*, 13); + our_env = new0(char*, 14); if (!our_env) return -ENOMEM; @@ -1830,6 +1830,13 @@ static int service_spawn_internal( } } + if (s->restart_mode == SERVICE_RESTART_MODE_DEBUG && UNIT(s)->debug_invocation) { + char *t = strdup("DEBUG_INVOCATION=1"); + if (!t) + return -ENOMEM; + our_env[n_env++] = t; + } + if (UNIT(s)->activation_details) { r = activation_details_append_env(UNIT(s)->activation_details, &our_env); if (r < 0) @@ -2006,6 +2013,22 @@ static ServiceState service_determine_dead_state(Service *s) { return s->fd_store && s->fd_store_preserve_mode == EXEC_PRESERVE_YES ? SERVICE_DEAD_RESOURCES_PINNED : SERVICE_DEAD; } +static void service_set_debug_invocation(Service *s, bool enable) { + assert(s); + + if (s->restart_mode != SERVICE_RESTART_MODE_DEBUG) + return; + + if (enable == UNIT(s)->debug_invocation) + return; /* Nothing to do */ + + UNIT(s)->debug_invocation = enable; + unit_overwrite_log_level_max(UNIT(s), enable ? LOG_PRI(LOG_DEBUG) : s->exec_context.log_level_max); + + if (enable) + log_unit_notice(UNIT(s), "Service failed, subsequent restarts will be executed with debug level logging."); +} + static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) { ServiceState end_state, restart_state; int r; @@ -2071,13 +2094,22 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) log_unit_debug(UNIT(s), "Next restart interval calculated as: %s", FORMAT_TIMESPAN(restart_usec_next, 0)); + /* If the relevant option is set, and the unit doesn't already have logging level set to + * debug, enable it now. Make sure to overwrite the state in /run/systemd/units/ too, to + * ensure journald doesn't prune the messages. The previous state is saved and restored + * once the auto-restart flow ends. */ + service_set_debug_invocation(s, /* enable= */ true); + service_set_state(s, SERVICE_AUTO_RESTART); - } else + } else { /* If we shan't restart, the restart counter would be flushed out. But rather than doing that * immediately here, this is delegated to service_start(), i.e. next start, so that the user * can still introspect the counter. */ service_set_state(s, end_state); + service_set_debug_invocation(s, /* enable= */ false); + } + /* The new state is in effect, let's decrease the fd store ref counter again. Let's also re-add us to the GC * queue, so that the fd store is possibly gc'ed again */ unit_add_to_gc_queue(UNIT(s)); @@ -4861,6 +4893,8 @@ static void service_reset_failed(Unit *u) { s->reload_result = SERVICE_SUCCESS; s->clean_result = SERVICE_SUCCESS; s->n_restarts = 0; + + service_set_debug_invocation(s, /* enable= */ false); } static PidRef* service_main_pid(Unit *u, bool *ret_is_alien) { @@ -5112,6 +5146,7 @@ DEFINE_STRING_TABLE_LOOKUP(service_restart, ServiceRestart); static const char* const service_restart_mode_table[_SERVICE_RESTART_MODE_MAX] = { [SERVICE_RESTART_MODE_NORMAL] = "normal", [SERVICE_RESTART_MODE_DIRECT] = "direct", + [SERVICE_RESTART_MODE_DEBUG] = "debug", }; DEFINE_STRING_TABLE_LOOKUP(service_restart_mode, ServiceRestartMode); diff --git a/src/core/service.h b/src/core/service.h index 8984be37b4..4d67174756 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -95,6 +95,7 @@ typedef enum ServiceTimeoutFailureMode { typedef enum ServiceRestartMode { SERVICE_RESTART_MODE_NORMAL, SERVICE_RESTART_MODE_DIRECT, + SERVICE_RESTART_MODE_DEBUG, _SERVICE_RESTART_MODE_MAX, _SERVICE_RESTART_MODE_INVALID = -EINVAL, } ServiceRestartMode; diff --git a/src/core/unit.c b/src/core/unit.c index b52ee90936..66203f2794 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3830,6 +3830,7 @@ void unit_reset_failed(Unit *u) { ratelimit_reset(&u->start_ratelimit); u->start_limit_hit = false; + u->debug_invocation = false; } Unit *unit_following(Unit *u) { @@ -5406,6 +5407,8 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) { if (!p->unit_id) return -ENOMEM; + p->debug_invocation = u->debug_invocation; + return 0; } @@ -5606,23 +5609,25 @@ static int unit_export_invocation_id(Unit *u) { return 0; } -static int unit_export_log_level_max(Unit *u, const ExecContext *c) { +static int unit_export_log_level_max(Unit *u, int log_level_max, bool overwrite) { const char *p; char buf[2]; int r; assert(u); - assert(c); - if (u->exported_log_level_max) + /* When the debug_invocation logic runs, overwrite will be true as we always want to switch the max + * log level that the journal applies, and we want to always restore the previous level once done */ + + if (!overwrite && u->exported_log_level_max) return 0; - if (c->log_level_max < 0) + if (log_level_max < 0) return 0; - assert(c->log_level_max <= 7); + assert(log_level_max <= 7); - buf[0] = '0' + c->log_level_max; + buf[0] = '0' + log_level_max; buf[1] = 0; p = strjoina("/run/systemd/units/log-level-max:", u->id); @@ -5772,7 +5777,7 @@ void unit_export_state_files(Unit *u) { c = unit_get_exec_context(u); if (c) { - (void) unit_export_log_level_max(u, c); + (void) unit_export_log_level_max(u, c->log_level_max, /* overwrite= */ false); (void) unit_export_log_extra_fields(u, c); (void) unit_export_log_ratelimit_interval(u, c); (void) unit_export_log_ratelimit_burst(u, c); @@ -5830,6 +5835,16 @@ void unit_unlink_state_files(Unit *u) { } } +int unit_overwrite_log_level_max(Unit *u, int log_level_max) { + assert(u); + + if (!u->exported_log_level_max) + return 0; /* Nothing to do */ + + /* Ensure that the new log level is exported for the journal, in place of the previous one */ + return unit_export_log_level_max(u, log_level_max, /* overwrite= */ true); +} + int unit_prepare_exec(Unit *u) { int r; diff --git a/src/core/unit.h b/src/core/unit.h index 04a4189de3..ce71335438 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -421,6 +421,9 @@ typedef struct Unit { /* Is this a unit that is always running and cannot be stopped? */ bool perpetual; + /* When true logs about this unit will be at debug level regardless of other log level settings */ + bool debug_invocation; + /* Booleans indicating membership of this unit in the various queues */ bool in_load_queue:1; bool in_dbus_queue:1; @@ -984,6 +987,7 @@ void unit_remove_dependencies(Unit *u, UnitDependencyMask mask); void unit_export_state_files(Unit *u); void unit_unlink_state_files(Unit *u); +int unit_overwrite_log_level_max(Unit *u, int log_level_max); int unit_prepare_exec(Unit *u); @@ -998,8 +1002,9 @@ static inline bool unit_has_job_type(Unit *u, JobType type) { } static inline bool unit_log_level_test(const Unit *u, int level) { + assert(u); ExecContext *ec = unit_get_exec_context(u); - return !ec || ec->log_level_max < 0 || ec->log_level_max >= LOG_PRI(level); + return !ec || ec->log_level_max < 0 || ec->log_level_max >= LOG_PRI(level) || u->debug_invocation; } /* unit_log_skip is for cases like ExecCondition= where a unit is considered "done" diff --git a/test/units/TEST-59-RELOADING-RESTART.sh b/test/units/TEST-59-RELOADING-RESTART.sh index 0e044037b8..7f79052cbb 100755 --- a/test/units/TEST-59-RELOADING-RESTART.sh +++ b/test/units/TEST-59-RELOADING-RESTART.sh @@ -165,4 +165,25 @@ rm /run/notify-reload-test.sh systemd-analyze log-level info +# Ensure that, with system log level info, we get debug level messages when a unit fails to start and is +# restarted with RestartMode=debug +cat >/run/systemd/system/testservice-fail-restart-debug-59.service <