commit bb9a68add4feef7c53ba3994256745ea99cffef8
parent b4518d79cc80e7ea0f4e53345ca29afa21e4f0f3
Author: Laurent Bercot <ska-skaware@skarnet.org>
Date: Mon, 20 Mar 2017 20:34:12 +0000
Implement permanent failure for services via ./finish exiting 125
Diffstat:
7 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/doc/s6-supervise.html b/doc/s6-supervise.html
@@ -56,6 +56,10 @@ s6-supervise kills it with a SIGKILL. This can be configured via the
<li> When <tt>./finish</tt> dies (or is killed),
s6-supervise sends a <tt>'D'</tt> event to <tt>./event</tt>. Then
it restarts <tt>./run</tt> unless it has been told not to. </li>
+ <li> If <tt>./finish</tt> exits 125, then s6-supervise sends a <tt>'O'</tt> event
+to <tt>./event</tt> <em>before</em> the <tt>'D'</tt>, and it
+<strong>does not restart the service</strong>, as if <tt>s6-svc -O</tt> had
+been called. This can be used to signify permanent failure to start the service. </li>
<li> There is a minimum 1-second delay between two <tt>./run</tt> spawns, to avoid busylooping
if <tt>./run</tt> exits too quickly. </li>
<li> When killed or asked to exit, it waits for the service to go down one last time, then
diff --git a/doc/servicedir.html b/doc/servicedir.html
@@ -87,15 +87,21 @@ is executed everytime the <tt>run</tt> script dies. Generally, its main
purpose is to clean up non-volatile data such as the filesystem after the supervised
process has been killed. If the <em>foo</em> service is supposed to be up,
<em>foo</em><tt>/run</tt> is restarted
-after <em>foo</em><tt>/finish</tt> dies. By default, a finish script must do
-its work and exit in less than
+after <em>foo</em><tt>/finish</tt> dies.
+ <ul>
+ <li> By default, a finish script must do its work and exit in less than
5 seconds; if it takes more than that, it is killed. (The point is that the run
script, not the finish script, should be running; the finish script should really
be short-lived.) The maximum duration of a <tt>finish</tt> execution can be
-configured via the <tt>timeout-finish</tt> file, see below. The finish script is
+configured via the <tt>timeout-finish</tt> file, see below. </li>
+ <li> The finish script is
executed with two arguments: the exit code from the run script (resp. 256 if the
run script was killed by a signal), and an undefined number (resp. the number of
the signal that killed the run script). </li>
+ <li> If the finish script exits 125, then <a href="s6-supervise.html">s6-supervise</a>
+interprets this as a permanent failure for the service, and does not restart it,
+as if an <a href="s6-svc.html">s6-svc -O</a> command had been sent. </li>
+ </ul> </li>
<li> A directory named <tt>supervise</tt>. It is automatically created by
<a href="s6-supervise.html">s6-supervise</a> if it does not exist. This is where
<a href="s6-supervise.html">s6-supervise</a> stores its information. The directory
diff --git a/src/supervision/s6-supervise.c b/src/supervision/s6-supervise.c
@@ -436,7 +436,13 @@ static void finishtimeout (void)
static void finish_z (void)
{
- set_down_and_ready("D", 1) ;
+ int wstat = (int)status.pid ;
+ if (WIFEXITED(wstat) && WEXITSTATUS(wstat) == 125)
+ {
+ status.flagwant = 0 ;
+ set_down_and_ready("OD", 2) ;
+ }
+ else set_down_and_ready("D", 1) ;
}
static void finish_u (void)
diff --git a/src/supervision/s6-svlisten.c b/src/supervision/s6-svlisten.c
@@ -63,6 +63,7 @@ int main (int argc, char const **argv, char const *const *envp)
{
s6_svlisten_t foo = S6_SVLISTEN_ZERO ;
pid_t pid ;
+ int e ;
uint16_t ids[argc1] ;
unsigned char upstate[bitarray_div8(argc1)] ;
unsigned char readystate[bitarray_div8(argc1)] ;
@@ -71,10 +72,12 @@ int main (int argc, char const **argv, char const *const *envp)
if (!pid) strerr_diefu2sys(111, "spawn ", argv[argc1 + 1]) ;
if (wantrestart)
{
- argc1 = s6_svlisten_loop(&foo, 0, 1, or, &deadline, spfd, &s6_svlisten_signal_handler) ;
- if (argc1) return argc1 ;
+ e = s6_svlisten_loop(&foo, 0, 1, or, &deadline, spfd, &s6_svlisten_signal_handler) ;
+ if (e) strerr_dief1x(e, "some services reported permanent failure") ;
wantup = 1 ;
}
- return s6_svlisten_loop(&foo, wantup, wantready, or, &deadline, spfd, &s6_svlisten_signal_handler) ;
+ e = s6_svlisten_loop(&foo, wantup, wantready, or, &deadline, spfd, &s6_svlisten_signal_handler) ;
+ if (e) strerr_dief1x(e, "some services reported permanent failure") ;
}
+ return 0 ;
}
diff --git a/src/supervision/s6-svlisten1.c b/src/supervision/s6-svlisten1.c
@@ -17,6 +17,7 @@ int main (int argc, char const *const *argv, char const *const *envp)
tain_t deadline, tto ;
pid_t pid ;
int spfd ;
+ int e ;
int wantup = 1, wantready = 0, wantrestart = 0 ;
uint16_t id ;
unsigned char upstate, readystate ;
@@ -52,9 +53,11 @@ int main (int argc, char const *const *argv, char const *const *envp)
if (!pid) strerr_diefu2sys(111, "spawn ", argv[1]) ;
if (wantrestart)
{
- int r = s6_svlisten_loop(&foo, 0, 1, 1, &deadline, spfd, &s6_svlisten_signal_handler) ;
- if (r) return r ;
+ int e = s6_svlisten_loop(&foo, 0, 1, 1, &deadline, spfd, &s6_svlisten_signal_handler) ;
+ if (e) strerr_dief2x(1, argv[0], " failed permanently: the finish script exited 125") ;
wantup = 1 ;
}
- return s6_svlisten_loop(&foo, wantup, wantready, 1, &deadline, spfd, &s6_svlisten_signal_handler) ;
+ e = s6_svlisten_loop(&foo, wantup, wantready, 1, &deadline, spfd, &s6_svlisten_signal_handler) ;
+ if (e) strerr_dief2x(1, argv[0], " failed permanently: the finish script exited 125") ;
+ return 0 ;
}
diff --git a/src/supervision/s6-svwait.c b/src/supervision/s6-svwait.c
@@ -46,10 +46,13 @@ int main (int argc, char const *const *argv)
{
s6_svlisten_t foo = S6_SVLISTEN_ZERO ;
+ int e ;
uint16_t ids[argc] ;
unsigned char upstate[bitarray_div8(argc)] ;
unsigned char readystate[bitarray_div8(argc)] ;
s6_svlisten_init(argc, argv, &foo, ids, upstate, readystate, &deadline) ;
- return s6_svlisten_loop(&foo, wantup, wantready, or, &deadline, -1, 0) ;
+ e = s6_svlisten_loop(&foo, wantup, wantready, or, &deadline, -1, 0) ;
+ if (e) strerr_dief1x(e, "some services reported permanent failure") ;
}
+ return 0 ;
}
diff --git a/src/supervision/s6_svlisten_loop.c b/src/supervision/s6_svlisten_loop.c
@@ -26,7 +26,7 @@ void s6_svlisten_init (int argc, char const *const *argv, s6_svlisten_t *foo, ui
memcpy(s, argv[i], len) ;
s[len] = '/' ;
memcpy(s + len + 1, S6_SUPERVISE_EVENTDIR, sizeof(S6_SUPERVISE_EVENTDIR)) ;
- foo->ids[i] = ftrigr_subscribe_g(&foo->a, s, "[DuUd]", FTRIGR_REPEAT, deadline) ;
+ foo->ids[i] = ftrigr_subscribe_g(&foo->a, s, "[DuUdO]", FTRIGR_REPEAT, deadline) ;
if (!foo->ids[i]) strerr_diefu2sys(111, "subscribe to events for ", argv[i]) ;
if (!s6_svstatus_read(argv[i], &status)) strerr_diefu1sys(111, "s6_svstatus_read") ;
bitarray_poke(foo->upstate, i, status.pid && !status.flagfinishing) ;
@@ -47,6 +47,7 @@ static inline int got (s6_svlisten_t const *foo, int wantup, int wantready, int
int s6_svlisten_loop (s6_svlisten_t *foo, int wantup, int wantready, int or, tain_t const *deadline, int spfd, action_func_t_ref handler)
{
iopause_fd x[2] = { { .fd = ftrigr_fd(&foo->a), .events = IOPAUSE_READ }, { .fd = spfd, .events = IOPAUSE_READ, .revents = 0 } } ;
+ unsigned int e = 0 ;
while (!got(foo, wantup, wantready, or))
{
int r = iopause_g(x, 1 + (spfd >= 0), deadline) ;
@@ -64,12 +65,21 @@ int s6_svlisten_loop (s6_svlisten_t *foo, int wantup, int wantready, int or, tai
if (r < 0) strerr_diefu1sys(111, "ftrigr_check") ;
if (r)
{
- unsigned int d = byte_chr("dDuU", 4, what) ;
- bitarray_poke(foo->upstate, i, d & 2) ;
- bitarray_poke(foo->readystate, i, d & 1) ;
+ if (what == 'O')
+ {
+ bitarray_poke(foo->upstate, i, wantup) ;
+ bitarray_poke(foo->readystate, i, wantready) ;
+ e++ ;
+ }
+ else
+ {
+ unsigned int d = byte_chr("dDuU", 4, what) ;
+ bitarray_poke(foo->upstate, i, d & 2) ;
+ bitarray_poke(foo->readystate, i, d & 1) ;
+ }
}
}
}
}
- return 0 ;
+ return e ;
}