commit ea67a084fac347a543d86e2c761340dee36d380f
parent 1682ba157c4c8e15903174243152c8675e47326d
Author: Laurent Bercot <ska-skaware@skarnet.org>
Date: Sat, 19 Aug 2017 15:37:17 +0000
Add s6-notifyoncheck
Diffstat:
7 files changed, 415 insertions(+), 0 deletions(-)
diff --git a/doc/index.html b/doc/index.html
@@ -154,6 +154,7 @@ a user interface to control those processes and monitor service states.
<li><a href="s6-svwait.html">The <tt>s6-svwait</tt> program</a></li>
<li><a href="s6-svlisten1.html">The <tt>s6-svlisten1</tt> program</a></li>
<li><a href="s6-svlisten.html">The <tt>s6-svlisten</tt> program</a></li>
+<li><a href="s6-notifyoncheck.html">The <tt>s6-notifyoncheck</tt> program</a></li>
</ul>
<h4> Daemontools-like utilities </h4>
diff --git a/doc/s6-notifyoncheck.html b/doc/s6-notifyoncheck.html
@@ -0,0 +1,160 @@
+<html>
+ <head>
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+ <meta http-equiv="Content-Language" content="en" />
+ <title>s6: the s6-notifyoncheck program</title>
+ <meta name="Description" content="s6: the s6-notifyoncheck program" />
+ <meta name="Keywords" content="s6 command s6-notifyoncheck notification service check polling" />
+ <!-- <link rel="stylesheet" type="text/css" href="//skarnet.org/default.css" /> -->
+ </head>
+<body>
+
+<p>
+<a href="index.html">s6</a><br />
+<a href="//skarnet.org/software/">Software</a><br />
+<a href="//skarnet.org/">skarnet.org</a>
+</p>
+
+<h1> The s6-notifyoncheck program </h1>
+
+<p>
+<tt>s6-notifyoncheck</tt> is a chain-loading program meant to be used
+in run scripts, in a service that has been declared to honor
+readiness notification. It implements a policy of running a user-provided
+executable in the background that polls the service currently being
+launched, in order to check when it becomes ready. It feeds the
+result of this check into the s6 notification mechanism.
+</p>
+
+<p>
+s6-notifyoncheck should <strong>only</strong> be used with daemons
+that can be polled from the outside to check readiness, and that
+<strong>do not implement readiness notification themselves</strong>.
+</p>
+
+<h2> Interface </h2>
+
+<pre>
+ s6-notifyoncheck [ -d ] [ -3 <em>notiffd</em> ] [ -s <em>initialsleep</em> ] [ -T <em>globaltimeout</em> ] [ -t <em>localtimeout</em> ] [ -w <em>waitingtime</em> ] [ -n <em>n</em> ] [ -c <em>checkprog</em> ] <em>prog...</em>
+</pre>
+
+<p>
+ s6-notifyoncheck forks and runs as the child; the parent immediately execs into
+<em>prog...</em>, the daemon that must be checked for readiness.
+</p>
+
+<p>
+ s6-notifyoncheck first waits for a little time, then it spawns the
+<tt>./data/check</tt> executable and waits for it to exit. If <tt>./data/check</tt>
+exits 0, then s6-notifyoncheck reports that the service is ready, then
+exits. If <tt>./data/check</tt> exits anything else, s6-notifyoncheck sleeps
+for a little time, then spawns <tt>./data/check</tt> again. It loops until
+<tt>./data/check</tt> succeeds, or 7 attempts fail, or a certain amount of
+time elapses.
+</p>
+
+<h2> Exit codes </h2>
+
+<p>
+ s6-notifyoncheck can exit before executing into <em>prog</em>:
+</p>
+
+<ul>
+ <li> 100: wrong usage </li>
+ <li> 111: system call failed </li>
+</ul>
+
+<p>
+ After forking, s6-notifyoncheck (running as the child) can
+exit with the following exit codes, but those are meaningless
+because no process will, or should, check them. They are only
+differentiated for clarity in the code:
+</p>
+
+<ul>
+ <li> 0: service readiness achieved and notification sent </li>
+ <li> 1: maximum number of attempts reached, all unsuccessful </li>
+ <li> 2: <em>prog</em> died, so s6-notifyoncheck exited early </li>
+ <li> 3: timed out before readiness was achieved </li>
+ <li> 111: system call failed </li>
+</ul>
+
+<h2> Options </h2>
+
+<ul>
+ <li> <tt>-d</tt> : doublefork. s6-notifyoncheck will run as the
+grandchild of <em>prog...</em> instead of its direct child. This is useful
+if <em>prog...</em> never reaps zombies it does not know it has. </li>
+ <li> <tt>-3 <em>notiffd</em></tt> : use <em>fd</em> as the
+file descriptor to send a readiness notification to. By default, this
+number is automatically read from the <tt>./notification-fd</tt> file. </li>
+ <li> <tt>-s <em>initialsleep</em></tt> : sleep for
+<em>initialsleep</em> milliseconds before starting to poll the service
+for readiness. Default is 10 milliseconds. </li>
+ <li> <tt>-T <em>globaltimeout</em></tt> : give up (and leave
+the service <em>up</em> but not <em>ready</em> if service readiness still
+has not been detected after <em>globaltimeout</em> milliseconds. Default
+is 0, meaning infinite: s6-notifyoncheck will keep polling until it succeeds. </li>
+ <li> <tt>-t <em>localtimeout</em></tt> : on every attempt, if
+<tt>./check</tt> still has not exited after <em>localtimeout</em> milliseconds,
+kill it and declare that attempt failed. Default is 0, meaning infinite:
+s6-notifyoncheck will wait forever for <tt>./data/check</tt> to exit. </li>
+ <li> <tt>-w <em>waitingtime</em></tt> : sleep for
+<em>waitingtime</em> milliseconds between two invocations of <tt>./data/check</tt>.
+This is basically the polling period. Default is 1000: the service will
+be polled every second. </li>
+ <li> <tt>-n <em>n</em></tt> : give up after <em>n</em>
+unsuccessful invocations of <tt>./data/check</tt>. 0 means infinite, i.e. keep
+polling until it succeeds, or times out, or the service dies first. </li>
+ <li> <tt>-c <em>checkprog...</em></tt> : invoke <em>checkprog...</em>
+instead of <tt>./data/check</tt>. The <em>checkprog</em> string will be parsed by
+<a href="//skarnet.org/software/execline/execlineb.html">execlineb</a>, so it
+can contain a full command line. This option is mainly useful is the program
+used to poll the service is very simple and can be inlined as a simple
+command line, to avoid needing to manage a whole script and a <tt>./data/check</tt>
+file. </li>
+</ul>
+
+<h2> Usage </h2>
+
+<p>
+ s6-notifyoncheck is designed to make it possible for services to use the
+<a href="//skarnet.org/software/s6/notifywhenup.html">s6 notification
+mechanism</a> even with daemons that do not natively implement the
+mechanism of writing a newline to a file descriptor of their choice when
+they're ready.
+</p>
+
+<p>
+<a href="//skarnet.org/cgi-bin/archive.cgi?2:mss:1607:dfblejammjllfkggpcph">Polling</a>
+is evil. Please make sure you really have no other choice before writing a
+<tt>./data/check</tt> program and using s6-notifyoncheck in your run script.
+If you have access to the source code of the daemon you want to check for
+readiness, consider patching it to add readiness notification support, which
+is extremely simple and does not require linking against any s6 library.
+</p>
+
+<p>
+ If using a <tt>./data/check</tt> program is your only option:
+</p>
+
+<ul>
+ <li> Make sure the <tt>./data</tt> subdirectory is readable and that
+<tt>./data/check</tt> is executable, exits 0 if the daemon
+it checks is ready, and exits nonzero if the daemon is not ready. </li>
+ <li> Add a <tt>./notification-fd</tt> file to your service directory,
+which can contain any number that is not 0, 1 or 2, or anything else
+explicitly used in your run script. The daemon does not need to care
+about that file descriptor; from the daemon's point of view, nothing
+changes. </li>
+ <li> In your run script, insert <tt>s6-notifyoncheck</tt> in the
+command line that will execute into your daemon. </li>
+ <li> <tt>./data/check</tt> will run as the same user as s6-notifyoncheck.
+If s6-notifyoncheck runs after the run script's process has lost its
+root privileges, make sure that <tt>./data/check</tt> is accessible
+and runnable as that user. </li>
+</ul>
+
+</body>
+</html>
diff --git a/package/deps.mak b/package/deps.mak
@@ -117,6 +117,7 @@ src/pipe-tools/s6-ftrig-listen1.o src/pipe-tools/s6-ftrig-listen1.lo: src/pipe-t
src/pipe-tools/s6-ftrig-notify.o src/pipe-tools/s6-ftrig-notify.lo: src/pipe-tools/s6-ftrig-notify.c src/include/s6/ftrigw.h
src/pipe-tools/s6-ftrig-wait.o src/pipe-tools/s6-ftrig-wait.lo: src/pipe-tools/s6-ftrig-wait.c src/include/s6/ftrigr.h
src/pipe-tools/s6-mkfifodir.o src/pipe-tools/s6-mkfifodir.lo: src/pipe-tools/s6-mkfifodir.c src/include/s6/ftrigw.h
+src/supervision/s6-notifyoncheck.o src/supervision/s6-notifyoncheck.lo: src/supervision/s6-notifyoncheck.c src/include/s6/ftrigr.h src/include/s6/s6-supervise.h
src/supervision/s6-supervise.o src/supervision/s6-supervise.lo: src/supervision/s6-supervise.c src/include/s6/ftrigw.h src/include/s6/s6-supervise.h
src/supervision/s6-svc.o src/supervision/s6-svc.lo: src/supervision/s6-svc.c src/include/s6/config.h src/include/s6/s6-supervise.h
src/supervision/s6-svlisten.o src/supervision/s6-svlisten.lo: src/supervision/s6-svlisten.c src/supervision/s6-svlisten.h
@@ -230,6 +231,8 @@ s6-ftrig-wait: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB}
s6-ftrig-wait: src/pipe-tools/s6-ftrig-wait.o ${LIBS6} -lskarnet
s6-mkfifodir: EXTRA_LIBS :=
s6-mkfifodir: src/pipe-tools/s6-mkfifodir.o ${LIBS6} -lskarnet
+s6-notifyoncheck: EXTRA_LIBS := ${SOCKET_LIB} ${TAINNOW_LIB} ${SPAWN_LIB}
+s6-notifyoncheck: src/supervision/s6-notifyoncheck.o ${LIBS6} -lskarnet
s6-supervise: EXTRA_LIBS := ${TAINNOW_LIB}
s6-supervise: src/supervision/s6-supervise.o ${LIBS6} -lskarnet
s6-svc: EXTRA_LIBS :=
diff --git a/package/modes b/package/modes
@@ -17,6 +17,7 @@ s6-svstat 0755
s6-svwait 0755
s6-svlisten1 0755
s6-svlisten 0755
+s6-notifyoncheck 0755
s6-applyuidgid 0700
s6-envdir 0755
s6-envuidgid 0755
diff --git a/package/targets.mak b/package/targets.mak
@@ -17,6 +17,7 @@ s6-svstat \
s6-svwait \
s6-svlisten1 \
s6-svlisten \
+s6-notifyoncheck \
s6-envdir \
s6-envuidgid \
s6-fghack \
diff --git a/src/supervision/deps-exe/s6-notifyoncheck b/src/supervision/deps-exe/s6-notifyoncheck
@@ -0,0 +1,5 @@
+${LIBS6}
+-lskarnet
+${SOCKET_LIB}
+${TAINNOW_LIB}
+${SPAWN_LIB}
diff --git a/src/supervision/s6-notifyoncheck.c b/src/supervision/s6-notifyoncheck.c
@@ -0,0 +1,244 @@
+/* ISC license. */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sys/wait.h>
+#include <skalibs/types.h>
+#include <skalibs/bytestr.h>
+#include <skalibs/sgetopt.h>
+#include <skalibs/strerr2.h>
+#include <skalibs/tai.h>
+#include <skalibs/djbunix.h>
+#include <skalibs/selfpipe.h>
+#include <skalibs/iopause.h>
+#include <execline/config.h>
+#include <s6/s6-supervise.h>
+#include <s6/ftrigr.h>
+
+#define USAGE "s6-notifyoncheck [ -d ] [ -3 fd ] [ -s initialsleep ] [ -T globaltimeout ] [ -t localtimeout ] [ -w waitingtime ] [ -n tries ] [ -c \"checkprog...\" ] prog..."
+#define dieusage() strerr_dieusage(100, USAGE)
+
+
+static inline int read_uint (char const *file, unsigned int *fd)
+{
+ char buf[UINT_FMT + 1] ;
+ ssize_t r = openreadnclose_nb(file, buf, UINT_FMT) ;
+ if (r < 0) return -1 ;
+ buf[byte_chr(buf, r, '\n')] = 0 ;
+ return !!uint0_scan(buf, fd) ;
+}
+
+static inline int handle_signals (pid_t pid, int *w)
+{
+ int gotit = 0 ;
+ for (;;)
+ {
+ switch (selfpipe_read())
+ {
+ case -1 : strerr_diefu1sys(111, "selfpipe_read") ;
+ case 0 : return gotit ;
+ case SIGCHLD :
+ {
+ int wstat ;
+ if (wait_pid_nohang(pid, &wstat) == pid)
+ {
+ *w = wstat ;
+ gotit = 1 ;
+ }
+ break ;
+ }
+ }
+ }
+}
+
+static int handle_event (ftrigr_t *a, uint16_t id, pid_t pid)
+{
+ int r ;
+ char what ;
+ if (ftrigr_update(a) < 0) strerr_diefu1sys(111, "ftrigr_update") ;
+ r = ftrigr_check(a, id, &what) ;
+ if (r < 0) strerr_diefu1sys(111, "ftrigr_check") ;
+ if (r && what == 'd')
+ {
+ if (pid) kill(pid, SIGTERM) ;
+ return 1 ;
+ }
+ return 0 ;
+}
+
+
+int main (int argc, char const *const *argv, char const *const *envp)
+{
+ ftrigr_t a = FTRIGR_ZERO ;
+ iopause_fd x[2] = { { .events = IOPAUSE_READ }, { .events = IOPAUSE_READ } } ;
+ char const *childargv[4] = { EXECLINE_EXTBINPREFIX "execlineb", "-c", 0, 0 } ;
+ char const *checkprog = 0 ;
+ unsigned int fd ;
+ int df = 0 ;
+ int autodetect = 1 ;
+ tain_t globaldeadline, sleeptto, localtto, waittto ;
+ unsigned int tries = 7 ;
+ uint16_t id ;
+ PROG = "s6-notifyoncheck" ;
+
+ {
+ subgetopt_t l = SUBGETOPT_ZERO ;
+ unsigned int initialsleep = 10, globaltimeout = 0, localtimeout = 0, waitingtime = 1000 ;
+ for (;;)
+ {
+ int opt = subgetopt_r(argc, argv, "d3:s:T:t:w:n:c:", &l) ;
+ if (opt == -1) break ;
+ switch (opt)
+ {
+ case 'd' : df = 1 ; break ;
+ case '3' : if (!uint0_scan(l.arg, &fd)) dieusage() ; autodetect = 0 ; break ;
+ case 's' : if (!uint0_scan(l.arg, &initialsleep)) dieusage() ; break ;
+ case 'T' : if (!uint0_scan(l.arg, &globaltimeout)) dieusage() ; break ;
+ case 't' : if (!uint0_scan(l.arg, &localtimeout)) dieusage() ; break ;
+ case 'w' : if (!uint0_scan(l.arg, &waitingtime)) dieusage() ; break ;
+ case 'n' : if (!uint0_scan(l.arg, &tries)) dieusage() ; break ;
+ case 'c' : checkprog = l.arg ; break ;
+ default : dieusage() ;
+ }
+ }
+ argc -= l.ind ; argv += l.ind ;
+ if (!argc) dieusage() ;
+
+ if (!tain_from_millisecs(&sleeptto, initialsleep)) dieusage() ;
+ if (globaltimeout) tain_from_millisecs(&globaldeadline, globaltimeout) ;
+ else globaldeadline = tain_infinite_relative ;
+ if (localtimeout) tain_from_millisecs(&localtto, localtimeout) ;
+ else localtto = tain_infinite_relative ;
+ if (waitingtime) tain_from_millisecs(&waittto, waitingtime) ;
+ else waittto = tain_infinite_relative ;
+ if (!tries) tries = UINT_MAX ;
+ }
+
+ {
+ int r = s6_svc_ok(".") ;
+ if (r < 0) strerr_diefu1sys(111, "sanity-check current service directory") ;
+ if (!r) strerr_dief1x(100, "s6-supervise not running.") ;
+ }
+ if (checkprog) childargv[2] = checkprog ;
+ else
+ {
+ childargv[0] = "./data/check" ;
+ childargv[1] = 0 ;
+ }
+
+ if (autodetect)
+ {
+ int r = read_uint("notification-fd", &fd) ;
+ if (r < 0) strerr_diefu2sys(111, "read ", "./notification-fd") ;
+ if (!r) strerr_dief2x(100, "invalid ", "./notification-fd") ;
+ }
+ if (fcntl(fd, F_GETFD) < 0)
+ strerr_dief2sys(111, "notification-fd", " sanity check failed") ;
+
+ tain_now_g() ;
+ tain_add_g(&globaldeadline, &globaldeadline) ;
+
+
+ /*
+ Fork, let the parent exec into the daemon, keep working in the child.
+
+ We want the child to die if the parent dies, because no need to keep
+ polling a dead service. And another child will be spawned next time the
+ service is relaunched by s6-supervise.
+ We could keep a pipe from the parent to the child, for death
+ notification, but that's an additional fd forever open in the parent,
+ which is not good (we need to be 100% transparent).
+ So we're using ftrigr to listen to a 'd' event in the servicedir's
+ fifodir. It's much heavier, but temporary - it doesn't use permanent
+ resources in the daemon - and we're polling anyway, so the user
+ doesn't care about being 100% lightweight.
+ */
+
+ if (!ftrigr_startf_g(&a, &globaldeadline))
+ strerr_diefu1sys(111, "ftrigr_startf") ;
+ id = ftrigr_subscribe_g(&a, "event", "d", 0, &globaldeadline) ;
+ if (!id) strerr_diefu1sys(111, "ftrigr_subscribe to event fifodir") ;
+ switch (df ? doublefork() : fork())
+ {
+ case -1: strerr_diefu1sys(111, df ? "doublefork" : "fork") ;
+ case 0 : break ;
+ default:
+ {
+ close((int)fd) ;
+ xpathexec_run(argv[0], argv, envp) ;
+ }
+ }
+
+ x[0].fd = selfpipe_init() ;
+ if (x[0].fd < 0) strerr_diefu1sys(111, "selfpipe_init") ;
+ if (selfpipe_trap(SIGCHLD) < 0) strerr_diefu1sys(111, "trap SIGCHLD") ;
+ x[1].fd = ftrigr_fd(&a) ;
+
+
+ /* Loop around a sleep and a ./data/check invocation */
+
+ while (tries == UINT_MAX || tries--)
+ {
+ tain_t deadline = globaldeadline ;
+ tain_t localdeadline ;
+ pid_t pid ;
+
+ tain_add_g(&localdeadline, &sleeptto) ;
+ sleeptto = waittto ;
+ if (tain_less(&localdeadline, &deadline)) deadline = localdeadline ;
+ for (;;)
+ {
+ int r = iopause_g(x+1, 1, &deadline) ;
+ if (r < 0) strerr_diefu1sys(111, "iopause") ;
+ if (!r)
+ {
+ if (!tain_future(&globaldeadline)) return 3 ;
+ else break ;
+ }
+ if (handle_event(&a, id, 0)) return 2 ;
+ }
+
+ pid = child_spawn0(childargv[0], childargv, envp) ;
+ if (!pid)
+ {
+ strerr_warnwu2sys("spawn ", childargv[0]) ;
+ continue ;
+ }
+ deadline = globaldeadline ;
+ tain_add_g(&localdeadline, &localtto) ;
+ if (tain_less(&localdeadline, &deadline)) deadline = localdeadline ;
+ for (;;)
+ {
+ int r = iopause_g(x, 2, &deadline) ;
+ if (r < 0) strerr_diefu1sys(111, "iopause") ;
+ if (!r)
+ {
+ if (!tain_future(&globaldeadline))
+ {
+ kill(pid, SIGTERM) ;
+ return 3 ;
+ }
+ else break ;
+ }
+ if (x[0].revents & IOPAUSE_READ)
+ {
+ int wstat ;
+ if (handle_signals(pid, &wstat))
+ {
+ if (WIFEXITED(wstat) && !WEXITSTATUS(wstat))
+ {
+ write((int)fd, "\n", 1) ;
+ return 0 ;
+ }
+ else break ;
+ }
+ }
+ if (x[1].revents & IOPAUSE_READ && handle_event(&a, id, pid)) return 2 ;
+ }
+ }
+
+ return 1 ;
+}