[REF] service: killed the ExportService concept.
[odoo/odoo.git] / openerp / service / workers.py
1 #-----------------------------------------------------------
2 # Multicorn, multiprocessing inspired by gunicorn
3 # TODO rename class: Multicorn -> Arbiter ?
4 #-----------------------------------------------------------
5 import errno
6 import fcntl
7 import logging
8 import os
9 import psutil
10 import random
11 import resource
12 import select
13 import signal
14 import socket
15 import sys
16 import time
17
18 import werkzeug.serving
19 try:
20     from setproctitle import setproctitle
21 except ImportError:
22     setproctitle = lambda x: None
23
24 import openerp
25 import openerp.tools.config as config
26
27 _logger = logging.getLogger(__name__)
28
29 class Multicorn(object):
30     """ Multiprocessing inspired by (g)unicorn.
31     Multicorn currently uses accept(2) as dispatching method between workers
32     but we plan to replace it by a more intelligent dispatcher to will parse
33     the first HTTP request line.
34     """
35     def __init__(self, app):
36         # config
37         self.address = (config['xmlrpc_interface'] or '0.0.0.0', config['xmlrpc_port'])
38         self.population = config['workers']
39         self.timeout = config['limit_time_real']
40         self.limit_request = config['limit_request']
41         # working vars
42         self.beat = 4
43         self.app = app
44         self.pid = os.getpid()
45         self.socket = None
46         self.workers_http = {}
47         self.workers_cron = {}
48         self.workers = {}
49         self.generation = 0
50         self.queue = []
51
52     def pipe_new(self):
53         pipe = os.pipe()
54         for fd in pipe:
55             # non_blocking
56             flags = fcntl.fcntl(fd, fcntl.F_GETFL) | os.O_NONBLOCK
57             fcntl.fcntl(fd, fcntl.F_SETFL, flags)
58             # close_on_exec
59             flags = fcntl.fcntl(fd, fcntl.F_GETFD) | fcntl.FD_CLOEXEC
60             fcntl.fcntl(fd, fcntl.F_SETFD, flags)
61         return pipe
62
63     def pipe_ping(self, pipe):
64         try:
65             os.write(pipe[1], '.')
66         except IOError, e:
67             if e.errno not in [errno.EAGAIN, errno.EINTR]:
68                 raise
69
70     def signal_handler(self, sig, frame):
71         if len(self.queue) < 5 or sig == signal.SIGCHLD:
72             self.queue.append(sig)
73             self.pipe_ping(self.pipe)
74         else:
75             _logger.warn("Dropping signal: %s", sig)
76
77     def worker_spawn(self, klass, workers_registry):
78         self.generation += 1
79         worker = klass(self)
80         pid = os.fork()
81         if pid != 0:
82             worker.pid = pid
83             self.workers[pid] = worker
84             workers_registry[pid] = worker
85             return worker
86         else:
87             worker.run()
88             sys.exit(0)
89
90     def worker_pop(self, pid):
91         if pid in self.workers:
92             _logger.debug("Worker (%s) unregistered",pid)
93             try:
94                 self.workers_http.pop(pid,None)
95                 self.workers_cron.pop(pid,None)
96                 u = self.workers.pop(pid)
97                 u.close()
98             except OSError:
99                 return
100
101     def worker_kill(self, pid, sig):
102         try:
103             os.kill(pid, sig)
104         except OSError, e:
105             if e.errno == errno.ESRCH:
106                 self.worker_pop(pid)
107
108     def process_signals(self):
109         while len(self.queue):
110             sig = self.queue.pop(0)
111             if sig in [signal.SIGINT,signal.SIGTERM]:
112                 raise KeyboardInterrupt
113
114     def process_zombie(self):
115         # reap dead workers
116         while 1:
117             try:
118                 wpid, status = os.waitpid(-1, os.WNOHANG)
119                 if not wpid:
120                     break
121                 if (status >> 8) == 3:
122                     msg = "Critial worker error (%s)"
123                     _logger.critical(msg, wpid)
124                     raise Exception(msg % wpid)
125                 self.worker_pop(wpid)
126             except OSError, e:
127                 if e.errno == errno.ECHILD:
128                     break
129                 raise
130
131     def process_timeout(self):
132         now = time.time()
133         for (pid, worker) in self.workers.items():
134             if now - worker.watchdog_time >= worker.watchdog_timeout:
135                 _logger.error("Worker (%s) timeout", pid)
136                 self.worker_kill(pid, signal.SIGKILL)
137
138     def process_spawn(self):
139         while len(self.workers_http) < self.population:
140             self.worker_spawn(WorkerHTTP, self.workers_http)
141         while len(self.workers_cron) < config['max_cron_threads']:
142             self.worker_spawn(WorkerCron, self.workers_cron)
143
144     def sleep(self):
145         try:
146             # map of fd -> worker
147             fds = dict([(w.watchdog_pipe[0],w) for k,w in self.workers.items()])
148             fd_in = fds.keys() + [self.pipe[0]]
149             # check for ping or internal wakeups
150             ready = select.select(fd_in, [], [], self.beat)
151             # update worker watchdogs
152             for fd in ready[0]:
153                 if fd in fds:
154                     fds[fd].watchdog_time = time.time()
155                 try:
156                     # empty pipe
157                     while os.read(fd, 1):
158                         pass
159                 except OSError, e:
160                     if e.errno not in [errno.EAGAIN]:
161                         raise
162         except select.error, e:
163             if e[0] not in [errno.EINTR]:
164                 raise
165
166     def start(self):
167         # wakeup pipe, python doesnt throw EINTR when a syscall is interrupted
168         # by a signal simulating a pseudo SA_RESTART. We write to a pipe in the
169         # signal handler to overcome this behaviour
170         self.pipe = self.pipe_new()
171         # set signal
172         signal.signal(signal.SIGINT, self.signal_handler)
173         signal.signal(signal.SIGTERM, self.signal_handler)
174         signal.signal(signal.SIGCHLD, self.signal_handler)
175         # listen to socket
176         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
177         self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
178         self.socket.setblocking(0)
179         self.socket.bind(self.address)
180         self.socket.listen(8)
181
182     def stop(self, graceful=True):
183         if graceful:
184             _logger.info("Stopping gracefully")
185             limit = time.time() + self.timeout
186             for pid in self.workers.keys():
187                 self.worker_kill(pid, signal.SIGTERM)
188             while self.workers and time.time() < limit:
189                 self.process_zombie()
190                 time.sleep(0.1)
191         else:
192             _logger.info("Stopping forcefully")
193         for pid in self.workers.keys():
194             self.worker_kill(pid, signal.SIGTERM)
195         self.socket.close()
196         openerp.cli.server.quit_signals_received = 1
197
198     def run(self):
199         self.start()
200         _logger.debug("Multiprocess starting")
201         while 1:
202             try:
203                 #_logger.debug("Multiprocess beat (%s)",time.time())
204                 self.process_signals()
205                 self.process_zombie()
206                 self.process_timeout()
207                 self.process_spawn()
208                 self.sleep()
209             except KeyboardInterrupt:
210                 _logger.debug("Multiprocess clean stop")
211                 self.stop()
212                 break
213             except Exception,e:
214                 _logger.exception(e)
215                 self.stop(False)
216                 sys.exit(-1)
217
218 class Worker(object):
219     """ Workers """
220     def __init__(self, multi):
221         self.multi = multi
222         self.watchdog_time = time.time()
223         self.watchdog_pipe = multi.pipe_new()
224         self.watchdog_timeout = multi.timeout
225         self.ppid = os.getpid()
226         self.pid = None
227         self.alive = True
228         # should we rename into lifetime ?
229         self.request_max = multi.limit_request
230         self.request_count = 0
231
232     def close(self):
233         os.close(self.watchdog_pipe[0])
234         os.close(self.watchdog_pipe[1])
235
236     def signal_handler(self, sig, frame):
237         self.alive = False
238
239     def sleep(self):
240         try:
241             ret = select.select([self.multi.socket], [], [], self.multi.beat)
242         except select.error, e:
243             if e[0] not in [errno.EINTR]:
244                 raise
245
246     def process_limit(self):
247         # If our parent changed sucide
248         if self.ppid != os.getppid():
249             _logger.info("Worker (%s) Parent changed", self.pid)
250             self.alive = False
251         # check for lifetime
252         if self.request_count >= self.request_max:
253             _logger.info("Worker (%d) max request (%s) reached.", self.pid, self.request_count)
254             self.alive = False
255         # Reset the worker if it consumes too much memory (e.g. caused by a memory leak).
256         rss, vms = psutil.Process(os.getpid()).get_memory_info()
257         if vms > config['limit_memory_soft']:
258             _logger.info('Worker (%d) virtual memory limit (%s) reached.', self.pid, vms)
259             self.alive = False # Commit suicide after the request.
260
261         # VMS and RLIMIT_AS are the same thing: virtual memory, a.k.a. address space
262         soft, hard = resource.getrlimit(resource.RLIMIT_AS)
263         resource.setrlimit(resource.RLIMIT_AS, (config['limit_memory_hard'], hard))
264
265         # SIGXCPU (exceeded CPU time) signal handler will raise an exception.
266         r = resource.getrusage(resource.RUSAGE_SELF)
267         cpu_time = r.ru_utime + r.ru_stime
268         def time_expired(n, stack):
269             _logger.info('Worker (%d) CPU time limit (%s) reached.', config['limit_time_cpu'])
270             # We dont suicide in such case
271             raise Exception('CPU time limit exceeded.')
272         signal.signal(signal.SIGXCPU, time_expired)
273         soft, hard = resource.getrlimit(resource.RLIMIT_CPU)
274         resource.setrlimit(resource.RLIMIT_CPU, (cpu_time + config['limit_time_cpu'], hard))
275
276     def process_work(self):
277         pass
278
279     def start(self):
280         self.pid = os.getpid()
281         setproctitle('openerp: %s %s' % (self.__class__.__name__, self.pid))
282         _logger.info("Worker %s (%s) alive", self.__class__.__name__, self.pid)
283         # Reseed the random number generator
284         random.seed()
285         # Prevent fd inherientence close_on_exec
286         flags = fcntl.fcntl(self.multi.socket, fcntl.F_GETFD) | fcntl.FD_CLOEXEC
287         fcntl.fcntl(self.multi.socket, fcntl.F_SETFD, flags)
288         # reset blocking status
289         self.multi.socket.setblocking(0)
290         signal.signal(signal.SIGINT, self.signal_handler)
291         signal.signal(signal.SIGTERM, signal.SIG_DFL)
292         signal.signal(signal.SIGCHLD, signal.SIG_DFL)
293
294     def stop(self):
295         pass
296
297     def run(self):
298         try:
299             self.start()
300             while self.alive:
301                 self.process_limit()
302                 self.multi.pipe_ping(self.watchdog_pipe)
303                 self.sleep()
304                 self.process_work()
305             _logger.info("Worker (%s) exiting. request_count: %s.", self.pid, self.request_count)
306             self.stop()
307         except Exception,e:
308             _logger.exception("Worker (%s) Exception occured, exiting..." % self.pid)
309             # should we use 3 to abort everything ?
310             sys.exit(1)
311
312 class WorkerHTTP(Worker):
313     """ HTTP Request workers """
314     def process_request(self, client, addr):
315         client.setblocking(1)
316         client.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
317         # Prevent fd inherientence close_on_exec
318         flags = fcntl.fcntl(client, fcntl.F_GETFD) | fcntl.FD_CLOEXEC
319         fcntl.fcntl(client, fcntl.F_SETFD, flags)
320         # do request using WorkerBaseWSGIServer monkey patched with socket
321         self.server.socket = client
322         self.server.process_request(client,addr)
323         self.request_count += 1
324
325     def process_work(self):
326         try:
327             client, addr = self.multi.socket.accept()
328             self.process_request(client, addr)
329         except socket.error, e:
330             if e[0] not in (errno.EAGAIN, errno.ECONNABORTED):
331                 raise
332
333     def start(self):
334         Worker.start(self)
335         self.server = WorkerBaseWSGIServer(self.multi.app)
336
337 class WorkerBaseWSGIServer(werkzeug.serving.BaseWSGIServer):
338     """ werkzeug WSGI Server patched to allow using an external listen socket
339     """
340     def __init__(self, app):
341         werkzeug.serving.BaseWSGIServer.__init__(self, "1", "1", app)
342     def server_bind(self):
343         # we dont bind beause we use the listen socket of Multicorn#socket
344         # instead we close the socket
345         if self.socket:
346             self.socket.close()
347     def server_activate(self):
348         # dont listen as we use Multicorn#socket
349         pass
350
351 class WorkerCron(Worker):
352     """ Cron workers """
353     def sleep(self):
354         interval = 60 + self.pid % 10 # chorus effect
355         time.sleep(interval)
356
357     def process_work(self):
358         rpc_request = logging.getLogger('openerp.netsvc.rpc.request')
359         rpc_request_flag = rpc_request.isEnabledFor(logging.DEBUG)
360         _logger.debug("WorkerCron (%s) polling for jobs", self.pid)
361         if config['db_name']:
362             db_names = config['db_name'].split(',')
363         else:
364             db_names = openerp.service.db.exp_list(True)
365         for db_name in db_names:
366             if rpc_request_flag:
367                 start_time = time.time()
368                 start_rss, start_vms = psutil.Process(os.getpid()).get_memory_info()
369             while True:
370                 # acquired = openerp.addons.base.ir.ir_cron.ir_cron._acquire_job(db_name)
371                 # TODO why isnt openerp.addons.base defined ?
372                 import base
373                 acquired = base.ir.ir_cron.ir_cron._acquire_job(db_name)
374                 if not acquired:
375                     break
376             # dont keep cursors in multi database mode
377             if len(db_names) > 1:
378                 openerp.sql_db.close_db(db_name)
379             if rpc_request_flag:
380                 end_time = time.time()
381                 end_rss, end_vms = psutil.Process(os.getpid()).get_memory_info()
382                 logline = '%s time:%.3fs mem: %sk -> %sk (diff: %sk)' % (db_name, end_time - start_time, start_vms / 1024, end_vms / 1024, (end_vms - start_vms)/1024)
383                 _logger.debug("WorkerCron (%s) %s", self.pid, logline)
384         # TODO Each job should be considered as one request instead of each run
385         self.request_count += 1
386
387     def start(self):
388         Worker.start(self)
389         openerp.service.start_internal()
390
391 # vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: