commit 0a39023fd7229371dd3e505208a1c0e073414ac2
parent 9cc5f3b25284ddca50ad1a19a83b1d1153dc9f5d
Author: Laurent Bercot <ska-skaware@skarnet.org>
Date: Mon, 15 Jun 2015 18:27:25 +0000
- Readiness notification support in s6-supervise
- s6-notifywhenup deprecated
- Change abundantly documented
- rc for 2.1.4.0
Diffstat:
13 files changed, 217 insertions(+), 52 deletions(-)
diff --git a/INSTALL b/INSTALL
@@ -6,8 +6,8 @@ Build Instructions
- A POSIX-compliant C development environment
- GNU make version 4.0 or later
- - skalibs version 2.3.2.0 or later: http://skarnet.org/software/skalibs/
- - execline version 2.1.1.0 or later: http://skarnet.org/software/execline/
+ - skalibs version 2.3.5.1 or later: http://skarnet.org/software/skalibs/
+ - execline version 2.1.2.2 or later: http://skarnet.org/software/execline/
This software will run on any operating system that implements
POSIX.1-2008, available at:
diff --git a/doc/index.html b/doc/index.html
@@ -84,11 +84,11 @@ with s6</a> </li>
<li> GNU make, version 4.0 or later. Please be aware that s6 will not build
with an earlier version. </li>
<li> <a href="http://skarnet.org/software/skalibs/">skalibs</a> version
-2.3.2.0 or later. It's a build-time requirement. It's also a run-time
+2.3.5.1 or later. It's a build-time requirement. It's also a run-time
requirement if you link against the shared version of the skalibs
library. </li>
<li> <a href="http://skarnet.org/software/execline/">execline</a> version
-2.1.1.0 or later. It's a build-time and run-time requirement. </li>
+2.1.2.2 or later. It's a build-time and run-time requirement. </li>
</ul>
<h3> Licensing </h3>
@@ -101,7 +101,7 @@ library. </li>
<h3> Download </h3>
<ul>
- <li> The current released version of s6 is <a href="s6-2.1.3.0.tar.gz">2.1.3.0</a>. </li>
+ <li> The current released version of s6 is <a href="s6-2.1.4.0.tar.gz">2.1.4.0</a>. </li>
<li> Alternatively, you can checkout a copy of the s6 git repository:
<pre> git clone git://git.skarnet.org/s6 </pre> </li>
<li> There's also a
@@ -153,7 +153,8 @@ a user interface to control those processes and monitor service states.
<li><a href="s6-svwait.html">The <tt>s6-svwait</tt> program</a></li>
<li><a href="s6-svlisten1.html">The <tt>s6-svlisten1</tt> program</a></li>
<li><a href="s6-svlisten.html">The <tt>s6-svlisten</tt> program</a></li>
-<li><a href="s6-notifywhenup.html">The <tt>s6-notifywhenup</tt> program</a> </li>
+<li><a href="s6-notifywhenup.html">The <tt>s6-notifywhenup</tt> program</a>
+<strong>(deprecated)</strong></li>
</ul>
<h4> Daemontools-like utilities </h4>
diff --git a/doc/notifywhenup.html b/doc/notifywhenup.html
@@ -60,19 +60,16 @@ against the s6 library or use any s6-specific construct:
daemons can simply write a line to a file descriptor of their choice,
then close that file descriptor, when they're ready to serve. This is
a generic mechanism that some daemons already implement.
-The administrator can
-then run the daemon under <a href="s6-notifywhenup.html">s6-notifywhenup</a>,
-which will properly catch the daemon's message and update a state file
-itself, then notify all the subscribers
-with a 'U' event, meaning that the service is now up. <br />
- Note that there is <em>still</em> a small race condition remaining:
-if the daemon writes a line then instantly dies, and the supervisor
-picks up the death before the <a href="s6-notifywhenup.html">s6-notifywhenup</a>
-program picks up the line, it is possible for the event sequence written
-to the fifodir to be wrong - 'd' before 'U'. This should be extremely
-rare, but unfortunately the race condition is unavoidable. The only
-way to be absolutely race-free is to have the daemon perform its
-readiness notification itself, which requires specific support.
+</p>
+
+<p>
+ s6 supports that mechanism natively: when the
+<a href="servicedir.html">service directory</a> for the daemon contains
+a valid <tt>notification-fd</tt> file, the daemon's supervisor, i.e. the
+<a href="s6-supervise.html">s6-supervise</a> program, will properly catch
+the daemon's message, update a state file (<tt>supervise/ready</tt>), then
+then notify all the subscribers
+with a 'U' event, meaning that the service is now up and ready.
</p>
<p>
diff --git a/doc/s6-notifywhenup.html b/doc/s6-notifywhenup.html
@@ -18,6 +18,27 @@
<h1> The s6-notifywhenup program </h1>
+<em>
+<p>
+Starting with s6-2.1.4.0, the s6-notifywhenup program has been deprecated,
+because there was still a case (albeit extremely rare) where a race
+condition would occur and readiness would be improperly advertised.
+Readiness notification for a service can now be achieved via a
+<tt>notification-fd</tt> file in the
+<a href="servicedir.html">service directory</a>, containing the
+number of the descriptor the service will write its readiness
+notification newline to. The notification will directly be picked by
+<a href="s6-supervise.html">s6-supervise</a>. <br />
+</p>
+
+<p>
+ Quick upgrade recipe: for every service using s6-notifywhenup,
+replace the s6-notifywhenup invocation in your run script with
+<tt>fdmove 1 3</tt>, then perform <tt>echo 3 > notification-fd</tt>.
+Done!
+</p>
+</em>
+
<p>
s6-notifywhenup launches a daemon while listening to a file descriptor,
and sends a 'U' event to a <a href="fifodir.html">fifodir</a> when it
diff --git a/doc/s6-supervise.html b/doc/s6-supervise.html
@@ -59,6 +59,47 @@ if <tt>./run</tt> exits too quickly. </li>
sends a <tt>'x'</tt> event to <tt>./event</tt> before exiting 0. </li>
</ul>
+<h2> Options </h2>
+
+<p>
+ s6-supervise does not support options, because it is normally not run
+manually via a command line; it is usually launched by its own
+supervisor, <a href="s6-svscan.html">s6-svscan</a>.
+ However, the behaviour of an instance of s6-supervise can be tuned via
+various configuration files in the service directory. These files, and
+what they do, are listed on the
+<a href="servicedir.html">service directory documentation page</a>.
+</p>
+
+<h2> Readiness notification support </h2>
+
+<p>
+ If the <a href="servicedir.html">service directory</a> contains a valid
+<tt>notification-fd</tt> file when the service is started, or restarted,
+s6-supervise creates and listens to an additional pipe from the service
+for <a href="notifywhenup.html">readiness notification</a>. When the
+notification occurs, s6-supervise creates a <tt>./supervise/ready</tt>
+file containing the absolute time when readiness occurred, then sends
+a <tt>'U'</tt> event to <tt>./event</tt>. The <tt>./supervise/ready</tt>
+file is deleted on service death.
+</p>
+
+<p>
+ If the service is logged, i.e. if the service directory has a
+<tt>log</tt> subdirectory that is also a service directory, and the
+s6-supervise process has been launched by
+that is also <a href="s6-svscan.html">s6-svscan</a>, then by default
+the service's stdout goes into the logging pipe. If you set
+<tt>notification-fd</tt> to 1, the logging pipe will be overwritten
+by the notification pipe, which is probably not what you want. Instead,
+if your daemon writes a notification message to its stdout, you should
+set <tt>notification-fd</tt> to (for instance) 3, and redirect outputs
+in your run script. For instance, to redirect stderr to the logger and
+stdout to a <tt>notification-fd</tt> set to 3, you would start your
+daemon as <tt>fdmove -c 2 1 fdmove 1 3 prog...</tt> (in execline), or
+<tt>exec 2>&1 1>&3 3<&- prog...</tt> (in shell).
+</p>
+
<h2> Signals </h2>
<p>
diff --git a/doc/s6-svlisten.html b/doc/s6-svlisten.html
@@ -64,8 +64,9 @@ support in the service programs. See <a href="notifywhenup.html">this page</a>
for details. </li>
<li> <tt>-U</tt> : really up. s6-svlisten will wait until the services are
up <em>and</em> ready as reported by the services themselves. This requires
-specific support in the service programs, and the use of
-<a href="s6-notifywhenup.html">s6-notifywhenup</a> in the service's run script.
+specific support in the service programs, and the use of the
+<tt>notification-fd</tt> file in the
+<a href="servicedir.html">service directory</a>.
See the explanation on <a href="notifywhenup.html">this page</a>. </li>
<li> <tt>-d</tt> : down. s6-svlisten will wait until the services are down. </li>
<li> <tt>-o</tt> : or. s6-svlisten will wait until <em>one</em> of the
diff --git a/doc/s6-svlisten1.html b/doc/s6-svlisten1.html
@@ -52,8 +52,9 @@ support in the service programs. See <a href="notifywhenup.html">this page</a>
for details. </li>
<li> <tt>-U</tt> : really up. s6-svlisten1 will wait until the service is
up <em>and</em> ready as reported by the daemon itself. This requires
-specific support in the daemon program, and the use of
-<a href="s6-notifywhenup.html">s6-notifywhenup</a> in the service's run script.
+specific support in the service programs, and the use of the
+<tt>notification-fd</tt> file in the
+<a href="servicedir.html">service directory</a>.
See the explanation on <a href="notifywhenup.html">this page</a>. </li>
<li> <tt>-d</tt> : down. s6-svlisten1 will wait until the service is down. </li>
<li> <tt>-t <em>timeout</em></tt> : if the requested event has not
diff --git a/doc/s6-svwait.html b/doc/s6-svwait.html
@@ -48,8 +48,9 @@ support in the service programs. See <a href="notifywhenup.html">this page</a>
for details. </li>
<li> <tt>-U</tt> : really up. s6-svwait will wait until the services are
up <em>and</em> ready as reported by the services themselves. This requires
-specific support in the service programs, and the use of
-<a href="s6-notifywhenup.html">s6-notifywhenup</a> in the service's run script.
+specific support in the service programs, and the use of the
+<tt>notification-fd</tt> file in the
+<a href="servicedir.html">service directory</a>.
See the explanation on <a href="notifywhenup.html">this page</a>. </li>
<li> <tt>-d</tt> : down. s6-svwait will wait until the services are down. </li>
<li> <tt>-o</tt> : or. s6-svwait will wait until <em>one</em> of the
diff --git a/doc/servicedir.html b/doc/servicedir.html
@@ -103,6 +103,20 @@ automatically start it until it receives a <tt>s6-svc -u</tt> command. If no
s6-supervise will not make the service a process group and session leader; the service
will be run in the same process group as s6-supervise. If no <tt>nosetsid</tt> file
exists, the service has its own process group and is started as a session leader. </li>
+ <li> An optional regular file named <tt>notification-fd</tt>. If such a file
+exists, it means that the service supports
+<a href="notifywhenup.html">readiness notification</a>. The file must only
+ contain an unsigned integer, which is the number of the file descriptor that
+the service writes its readiness notification to. (For instance, it should
+be 1 if the daemon is <a href="s6-ipcserverd">s6-ipcserverd</a> run with the
+<tt>-1</tt> option.) </li>
+ when a service is started, or restarted, by s6-supervise, if this file
+exists and contains a valid descriptor number, s6-supervise will wait for the
+notification from the service and broadcast readiness, i.e. any
+<a href="s6-svwait.html">s6-svwait -U</a>,
+<a href="s6-svlisten1.html">s6-svlisten1 -U</a> or
+<a href="s6-svlisten.html">s6-svlisten -U</a> processes will be
+triggered. </li>
<li> A <a href="fifodir.html">fifodir</a> named <tt>event</tt>. It is automatically
created by <a href="s6-supervise.html">s6-supervise</a> if it does not exist.
<em>foo</em><tt>/event</tt>
diff --git a/doc/upgrade.html b/doc/upgrade.html
@@ -18,6 +18,18 @@
<h1> What has changed in s6 </h1>
+<h2> in 2.1.4.0 </h2>
+
+<ul>
+ <li> skalibs dependency bumped to 2.3.5.1. </li>
+ <li> execline dependency bumped to 2.1.2.2. </li>
+ <li> The <a href="s6-notifywhenup.html">s6-notifywhenup</a> command
+has been deprecated. Race-free
+<a href="notifywhenup.html">readiness notification</a> can now be implemented
+via the <tt>notification-fd</tt> file in a
+<a href="servicedir.html">service directory</a>. </li>
+</ul>
+
<h2> in 2.1.3.0 </h2>
<ul>
diff --git a/package/info b/package/info
@@ -1,4 +1,4 @@
package=s6
-version=2.1.3.0
+version=2.1.4.0
category=admin
package_macro_name=S6
diff --git a/src/supervision/s6-notifywhenup.c b/src/supervision/s6-notifywhenup.c
@@ -72,6 +72,7 @@ int main (int argc, char const *const *argv, char const *const *envp)
argc -= l.ind ; argv += l.ind ;
}
if (!argc) dieusage() ;
+ strerr_warnw1x("this program is deprecated. Use a notification-fd file instead.") ;
{
int p[2] ;
diff --git a/src/supervision/s6-supervise.c b/src/supervision/s6-supervise.c
@@ -1,7 +1,6 @@
/* ISC license. */
#include <sys/types.h>
-#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>
@@ -47,8 +46,8 @@ typedef action_t *action_t_ref ;
static tain_t deadline ;
static s6_svstatus_t status = { .stamp = TAIN_ZERO, .pid = 0, .flagwant = 1, .flagwantup = 1, .flagpaused = 0, .flagfinishing = 0, .wstat = 0 } ;
static state_t state = DOWN ;
-static int flagsetsid = 1 ;
static int cont = 1 ;
+static int notifyfd = -1 ;
static inline void settimeout (int secs)
{
@@ -137,21 +136,64 @@ static void killc (void)
announce() ;
}
+static void failcoe (int fd)
+{
+ register int e = errno ;
+ fd_write(fd, "", 1) ;
+ errno = e ;
+}
+
+static int maybesetsid (void)
+{
+ if (access("nosetsid", F_OK) < 0)
+ {
+ if (errno != ENOENT) return 0 ;
+ setsid() ;
+ }
+ return 1 ;
+}
+
static void trystart (void)
{
int p[2] ;
+ int notifyp[2] = { -1, -1 } ;
+ unsigned int fd ;
pid_t pid ;
if (pipecoe(p) < 0)
{
settimeout(60) ;
- strerr_warnwu1sys("pipecoe (waiting 60 seconds)") ;
+ strerr_warnwu1sys("pipe (waiting 60 seconds)") ;
return ;
}
+ {
+ char buf[UINT_FMT + 1] ;
+ register int r = openreadnclose("notification-fd", buf, UINT_FMT) ;
+ if (r < 0)
+ {
+ if (errno != ENOENT)
+ strerr_warnwu1sys("open notification-fd") ;
+ }
+ else
+ {
+ buf[byte_chr(buf, r, '\n')] = 0 ;
+ if (!uint0_scan(buf, &fd))
+ strerr_warnw1x("invalid notification-fd") ;
+ else if (pipe(notifyp) < 0)
+ {
+ settimeout(60) ;
+ strerr_warnwu1sys("pipe (waiting 60 seconds)") ;
+ fd_close(p[1]) ; fd_close(p[0]) ;
+ return ;
+ }
+ }
+ }
pid = fork() ;
if (pid < 0)
{
settimeout(60) ;
strerr_warnwu1sys("fork (waiting 60 seconds)") ;
+ if (notifyp[1] >= 0) fd_close(notifyp[1]) ;
+ if (notifyp[0] >= 0) fd_close(notifyp[0]) ;
fd_close(p[1]) ; fd_close(p[0]) ;
return ;
}
@@ -160,14 +202,25 @@ static void trystart (void)
char const *cargv[2] = { "run", 0 } ;
PROG = "s6-supervise (child)" ;
selfpipe_finish() ;
- fd_close(p[0]) ;
+ if (notifyp[0] >= 0) close(notifyp[0]) ;
+ close(p[0]) ;
if (unlink(S6_SUPERVISE_READY_FILENAME) < 0 && errno != ENOENT)
strerr_warnwu1sys("unlink " S6_SUPERVISE_READY_FILENAME) ;
- if (flagsetsid) setsid() ;
+ if (notifyp[1] >= 0 && fd_move((int)fd, notifyp[1]) < 0)
+ {
+ failcoe(p[1]) ;
+ strerr_diefu1sys(127, "move notification descriptor") ;
+ }
+ if (!maybesetsid())
+ {
+ failcoe(p[1]) ;
+ strerr_diefu1sys(127, "access ./nosetsid") ;
+ }
execve("./run", (char *const *)cargv, (char *const *)environ) ;
- fd_write(p[1], "", 1) ;
+ failcoe(p[1]) ;
strerr_dieexec(127, "run") ;
}
+ if (notifyp[1] >= 0) fd_close(notifyp[1]) ;
fd_close(p[1]) ;
{
char c ;
@@ -189,6 +242,7 @@ static void trystart (void)
}
}
fd_close(p[0]) ;
+ notifyfd = notifyp[0] ;
settimeout_infinite() ;
state = UP ;
status.pid = pid ;
@@ -249,7 +303,7 @@ static inline void tryfinish (int islast)
selfpipe_finish() ;
fmt0[uint_fmt(fmt0, WIFSIGNALED(status.wstat) ? 256 : WEXITSTATUS(status.wstat))] = 0 ;
fmt1[uint_fmt(fmt1, WTERMSIG(status.wstat))] = 0 ;
- if (flagsetsid) setsid() ;
+ maybesetsid() ;
execve("./finish", cargv, (char *const *)environ) ;
_exit(127) ;
}
@@ -269,6 +323,11 @@ static void uplastup_z (int islast)
status.wstat = status.pid ;
status.pid = 0 ;
tain_copynow(&status.stamp) ;
+ if (notifyfd >= 0)
+ {
+ fd_close(notifyfd) ;
+ notifyfd = -1 ;
+ }
tryfinish(islast) ;
announce() ;
ftrigw_notifyb_nosig(S6_SUPERVISE_EVENTDIR, "d", 1) ;
@@ -368,11 +427,36 @@ static action_t_ref const actions[5][23] =
} ;
+
/* The main loop.
It just loops around the iopause(), calling snippets of code in "actions" when needed. */
-static void handle_signals (void)
+static inline void handle_notifyfd (void)
+{
+ char buf[4096] ;
+ register int r = 1 ;
+ while (r > 0)
+ {
+ r = sanitize_read(fd_read(notifyfd, buf, 4096)) ;
+ if (r > 0 && byte_chr(buf, r, '\n') < r)
+ {
+ char pack[TAIN_PACK] ;
+ tain_pack(pack, &STAMP) ;
+ if (!openwritenclose_suffix(S6_SUPERVISE_READY_FILENAME, pack, TAIN_PACK, ".new"))
+ strerr_warnwu3sys("open ", S6_SUPERVISE_READY_FILENAME, " for writing") ;
+ ftrigw_notifyb_nosig(S6_SUPERVISE_EVENTDIR, "U", 1) ;
+ r = -1 ;
+ }
+ if (r < 0)
+ {
+ fd_close(notifyfd) ;
+ notifyfd = -1 ;
+ }
+ }
+}
+
+static inline void handle_signals (void)
{
for (;;)
{
@@ -410,7 +494,7 @@ static void handle_signals (void)
}
}
-static void handle_control (int fd)
+static inline void handle_control (int fd)
{
for (;;)
{
@@ -428,7 +512,7 @@ static void handle_control (int fd)
int main (int argc, char const *const *argv)
{
- iopause_fd x[2] = { { -1, IOPAUSE_READ, 0 }, { -1, IOPAUSE_READ, 0 } } ;
+ iopause_fd x[3] = { { -1, IOPAUSE_READ, 0 }, { -1, IOPAUSE_READ, 0 }, { -1, IOPAUSE_READ, 0 } } ;
PROG = "s6-supervise" ;
if (argc < 2) strerr_dieusage(100, USAGE) ;
if (chdir(argv[1]) < 0) strerr_diefu2sys(111, "chdir to ", argv[1]) ;
@@ -460,21 +544,9 @@ int main (int argc, char const *const *argv)
if (!ftrigw_clean(S6_SUPERVISE_EVENTDIR))
strerr_warnwu2sys("ftrigw_clean ", S6_SUPERVISE_EVENTDIR) ;
- {
- struct stat st ;
- if (stat("down", &st) == -1)
- {
- if (errno != ENOENT)
- strerr_diefu1sys(111, "stat down") ;
- }
- else status.flagwantup = 0 ;
- if (stat("nosetsid", &st) == -1)
- {
- if (errno != ENOENT)
- strerr_diefu1sys(111, "stat nosetsid") ;
- }
- else flagsetsid = 0 ;
- }
+ if (access("down", F_OK) == 0) status.flagwantup = 0 ;
+ else if (errno != ENOENT)
+ strerr_diefu1sys(111, "access ./down") ;
tain_now_g() ;
settimeout(0) ;
@@ -484,13 +556,16 @@ int main (int argc, char const *const *argv)
while (cont)
{
- register int r = iopause_g(x, 2, &deadline) ;
+ register int r ;
+ x[2].fd = notifyfd ;
+ r = iopause_g(x, 2 + (notifyfd >= 0), &deadline) ;
if (r < 0) strerr_diefu1sys(111, "iopause") ;
else if (!r) (*actions[state][V_TIMEOUT])() ;
else
{
if ((x[0].revents | x[1].revents) & IOPAUSE_EXCEPT)
strerr_diefu1x(111, "iopause: trouble with pipes") ;
+ if (notifyfd >= 0 && x[2].revents & IOPAUSE_READ) handle_notifyfd() ;
if (x[0].revents & IOPAUSE_READ) handle_signals() ;
else if (x[1].revents & IOPAUSE_READ) handle_control(x[1].fd) ;
}