File: /srv/admin/scripts/watchd
#!/usr/bin/env python
import sys
import os
import argparse
import atexit
import daemon
import signal
import subprocess
import time
import errno
STARTSECS = 1
class Watcher(object):
def __init__(self, cmd, cwd, logfile):
self._cmd = cmd
self._cwd = cwd
self._logfile = logfile
self._process = None
self._enabled = True
def _wait(self):
if self._process is None:
return
self._process.wait()
if self._process.returncode >= 0:
log("process exited with status %d", self._process.returncode)
else:
log("process exited due to signal %d", -(self._process.returncode))
runtime = (time.time() - self._process.start_time)
if runtime < STARTSECS:
log("process exited too quickly; pausing")
self._enabled = False
self._process = None
def start(self):
try:
with open(os.devnull, "r") as fnull:
with open(self._logfile, "a") as flog:
self._process = subprocess.Popen(self._cmd, close_fds=True,
cwd=self._cwd, stdin=fnull,
stdout=flog, stderr=flog)
except IOError, e:
fatal("could not redirect: %s: %s", e.filename, e.strerror)
except OSError, e:
if e.errno == errno.ENOENT and e.filename == self._cwd:
self._enabled = False
log("%s missing; pausing", self._cwd)
return
fatal("could not start process: %s", e.strerror)
setattr(self._process, "start_time", time.time())
log("process started with pid %d", self._process.pid)
def stop(self):
log("stopping process")
self._process.kill()
for _ in range(5):
if self._process.poll() is not None:
return
time.sleep(1)
log("process still alive after 5s; terminating process")
self._process.terminate()
def restart(self):
if self._enabled:
self.stop()
else:
self._enabled = True
def watch(self):
while True:
if self._enabled:
self.start()
self._wait()
else:
signal.pause()
def atexit(self):
if self._process is not None:
self.stop()
self._process.wait()
def log_open(logfile):
try:
sys.stderr = open(logfile, "a")
except IOError (_, errstr):
fatal("could not open logfile: %s", errstr)
def log_reopen():
if sys.stderr.isatty():
return
try:
f = open(sys.stderr.name, "a")
except IOError (_, errstr):
log("could not reopen logfile: %s", errstr)
else:
sys.stderr.flush()
sys.stderr.close()
sys.stderr = f
def log(msg, *args):
print >>sys.stderr, time.strftime("%b %e %X"), msg % tuple(args)
sys.stderr.flush()
def fatal(msg, *args):
log(msg, *args)
sys.exit(1)
def parse_arguments():
parser = argparse.ArgumentParser(usage="%(prog)s [options] command...")
parser.add_argument("--logfile", metavar="LOGFILE", required=True)
parser.add_argument("--pidfile", metavar="PIDFILE", required=True)
parser.add_argument("--app-dir", metavar="APP_DIR", required=True)
parser.add_argument("--app-logfile", metavar="APP_LOGFILE", required=True)
parser.add_argument("--debug", action="store_true", default=False)
parser.add_argument("command", nargs=argparse.REMAINDER)
return parser.parse_args()
def write_pidfile(pidfile):
try:
with open(pidfile, "w") as f:
print >>f, os.getpid()
except IOError, (_, errstr):
fatal("could not write pidfile: %s", errstr)
sys.exit(1)
return 0
def setup_signal_handlers(watcher):
def _handler(func):
def _wrapper(signum, frame):
log("received signal %d", signum)
func()
return _wrapper
signal.signal(signal.SIGUSR1, _handler(log_reopen))
signal.signal(signal.SIGHUP, _handler(watcher.restart))
signal.signal(signal.SIGINT, _handler(sys.exit))
signal.signal(signal.SIGTERM, _handler(sys.exit))
def main():
args = parse_arguments()
if not args.debug:
log_open(args.logfile)
watcher = Watcher(args.command, args.app_dir, args.app_logfile)
with daemon.DaemonContext(detach_process=(not args.debug),
stderr=sys.stderr):
atexit.register(watcher.atexit)
setup_signal_handlers(watcher)
write_pidfile(args.pidfile)
watcher.watch()
if __name__ == "__main__":
main()