Synopsis - Cross-Reference

File: /src/Synopsis/gc/pthread_stop_world.c
  1#include "private/pthread_support.h"
  2
  3#if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) && \
  4    !defined(GC_DARWIN_THREADS)
  5
  6#include <signal.h>
  7#include <semaphore.h>
  8#include <errno.h>
  9#include <unistd.h>
 10#include "atomic_ops.h"
 11
 12#if DEBUG_THREADS
 13
 14#ifndef NSIG
 15# if defined(MAXSIG)
 16#  define NSIG (MAXSIG+1)
 17# elif defined(_NSIG)
 18#  define NSIG _NSIG
 19# elif defined(__SIGRTMAX)
 20#  define NSIG (__SIGRTMAX+1)
 21# else
 22  --> please fix it
 23# endif
 24#endif
 25
 26void GC_print_sig_mask()
 27{
 28    sigset_t blocked;
 29    int i;
 30
 31    if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
 32    	ABORT("pthread_sigmask");
 33    GC_printf("Blocked: ");
 34    for (i = 1; i < NSIG; i++) {
 35        if (sigismember(&blocked, i)) { GC_printf("%d ", i); }
 36    }
 37    GC_printf("\n");
 38}
 39
 40#endif
 41
 42/* Remove the signals that we want to allow in thread stopping 	*/
 43/* handler from a set.						*/
 44void GC_remove_allowed_signals(sigset_t *set)
 45{
 46    if (sigdelset(set, SIGINT) != 0
 47	  || sigdelset(set, SIGQUIT) != 0
 48	  || sigdelset(set, SIGABRT) != 0
 49	  || sigdelset(set, SIGTERM) != 0) {
 50        ABORT("sigdelset() failed");
 51    }
 52
 53#   ifdef MPROTECT_VDB
 54      /* Handlers write to the thread structure, which is in the heap,	*/
 55      /* and hence can trigger a protection fault.			*/
 56      if (sigdelset(set, SIGSEGV) != 0
 57#	  ifdef SIGBUS
 58	    || sigdelset(set, SIGBUS) != 0
 59# 	  endif
 60	  ) {
 61        ABORT("sigdelset() failed");
 62      }
 63#   endif
 64}
 65
 66static sigset_t suspend_handler_mask;
 67
 68volatile AO_t GC_stop_count;
 69			/* Incremented at the beginning of GC_stop_world. */
 70
 71volatile AO_t GC_world_is_stopped = FALSE;
 72			/* FALSE ==> it is safe for threads to restart, i.e. */
 73			/* they will see another suspend signal before they  */
 74			/* are expected to stop (unless they have voluntarily */
 75			/* stopped).					     */
 76
 77#ifdef GC_OSF1_THREADS
 78  GC_bool GC_retry_signals = TRUE;
 79#else
 80  GC_bool GC_retry_signals = FALSE;
 81#endif
 82
 83/*
 84 * We use signals to stop threads during GC.
 85 * 
 86 * Suspended threads wait in signal handler for SIG_THR_RESTART.
 87 * That's more portable than semaphores or condition variables.
 88 * (We do use sem_post from a signal handler, but that should be portable.)
 89 *
 90 * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h.
 91 * Note that we can't just stop a thread; we need it to save its stack
 92 * pointer(s) and acknowledge.
 93 */
 94
 95#ifndef SIG_THR_RESTART
 96#  if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || defined(GC_NETBSD_THREADS)
 97#    ifdef _SIGRTMIN
 98#      define SIG_THR_RESTART _SIGRTMIN + 5
 99#    else
100#      define SIG_THR_RESTART SIGRTMIN + 5
101#    endif
102#  else
103#   define SIG_THR_RESTART SIGXCPU
104#  endif
105#endif
106
107sem_t GC_suspend_ack_sem;
108
109#ifdef GC_NETBSD_THREADS
110# define GC_NETBSD_THREADS_WORKAROUND
111  /* It seems to be necessary to wait until threads have restarted.	*/
112  /* But it is unclear why that is the case.				*/
113  sem_t GC_restart_ack_sem;
114#endif
115
116void GC_suspend_handler_inner(ptr_t sig_arg, void *context);
117
118#if defined(IA64) || defined(HP_PA) || defined(M68K)
119void GC_suspend_handler(int sig, siginfo_t *info, void *context)
120{
121  int old_errno = errno;
122  GC_with_callee_saves_pushed(GC_suspend_handler_inner, (ptr_t)(word)sig);
123  errno = old_errno;
124}
125#else
126/* We believe that in all other cases the full context is already	*/
127/* in the signal handler frame.						*/
128void GC_suspend_handler(int sig, siginfo_t *info, void *context)
129{
130  int old_errno = errno;
131  GC_suspend_handler_inner((ptr_t)(word)sig, context);
132  errno = old_errno;
133}
134#endif
135
136void GC_suspend_handler_inner(ptr_t sig_arg, void *context)
137{
138    int sig = (int)(word)sig_arg;
139    int dummy;
140    pthread_t my_thread = pthread_self();
141    GC_thread me;
142#   ifdef PARALLEL_MARK
143	word my_mark_no = GC_mark_no;
144	/* Marker can't proceed until we acknowledge.  Thus this is	*/
145	/* guaranteed to be the mark_no correspending to our 		*/
146	/* suspension, i.e. the marker can't have incremented it yet.	*/
147#   endif
148    AO_t my_stop_count = AO_load(&GC_stop_count);
149
150    if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
151
152#   if DEBUG_THREADS
153      GC_printf("Suspending 0x%x\n", (unsigned)my_thread);
154#   endif
155
156    me = GC_lookup_thread(my_thread);
157    /* The lookup here is safe, since I'm doing this on behalf  */
158    /* of a thread which holds the allocation lock in order	*/
159    /* to stop the world.  Thus concurrent modification of the	*/
160    /* data structure is impossible.				*/
161    if (me -> stop_info.last_stop_count == my_stop_count) {
162	/* Duplicate signal.  OK if we are retrying.	*/
163	if (!GC_retry_signals) {
164	    WARN("Duplicate suspend signal in thread %lx\n",
165		 pthread_self());
166	}
167	return;
168    }
169#   ifdef SPARC
170	me -> stop_info.stack_ptr = GC_save_regs_in_stack();
171#   else
172	me -> stop_info.stack_ptr = (ptr_t)(&dummy);
173#   endif
174#   ifdef IA64
175	me -> backing_store_ptr = GC_save_regs_in_stack();
176#   endif
177
178    /* Tell the thread that wants to stop the world that this   */
179    /* thread has been stopped.  Note that sem_post() is  	*/
180    /* the only async-signal-safe primitive in LinuxThreads.    */
181    sem_post(&GC_suspend_ack_sem);
182    me -> stop_info.last_stop_count = my_stop_count;
183
184    /* Wait until that thread tells us to restart by sending    */
185    /* this thread a SIG_THR_RESTART signal.			*/
186    /* SIG_THR_RESTART should be masked at this point.  Thus there	*/
187    /* is no race.						*/
188    /* We do not continue until we receive a SIG_THR_RESTART,	*/
189    /* but we do not take that as authoritative.  (We may be	*/
190    /* accidentally restarted by one of the user signals we 	*/
191    /* don't block.)  After we receive the signal, we use a 	*/
192    /* primitive and expensive mechanism to wait until it's	*/
193    /* really safe to proceed.  Under normal circumstances,	*/
194    /* this code should not be executed.			*/
195    do {
196	sigsuspend (&suspend_handler_mask);
197    } while (AO_load_acquire(&GC_world_is_stopped)
198	     && AO_load(&GC_stop_count) == my_stop_count);
199    /* If the RESTART signal gets lost, we can still lose.  That should be  */
200    /* less likely than losing the SUSPEND signal, since we don't do much   */
201    /* between the sem_post and sigsuspend.	   			    */
202    /* We'd need more handshaking to work around that.			    */
203    /* Simply dropping the sigsuspend call should be safe, but is unlikely  */
204    /* to be efficient.							    */
205
206#   if DEBUG_THREADS
207      GC_printf("Continuing 0x%x\n", (unsigned)my_thread);
208#   endif
209}
210
211void GC_restart_handler(int sig)
212{
213    pthread_t my_thread = pthread_self();
214    GC_thread me;
215
216    if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler");
217
218#   ifdef GC_NETBSD_THREADS_WORKAROUND
219      sem_post(&GC_restart_ack_sem);
220#   endif
221
222    /*
223    ** Note: even if we don't do anything useful here,
224    ** it would still be necessary to have a signal handler,
225    ** rather than ignoring the signals, otherwise
226    ** the signals will not be delivered at all, and
227    ** will thus not interrupt the sigsuspend() above.
228    */
229
230#   if DEBUG_THREADS
231      GC_printf("In GC_restart_handler for 0x%x\n", (unsigned)pthread_self());
232#   endif
233}
234
235# ifdef IA64
236#   define IF_IA64(x) x
237# else
238#   define IF_IA64(x)
239# endif
240/* We hold allocation lock.  Should do exactly the right thing if the	*/
241/* world is stopped.  Should not fail if it isn't.			*/
242void GC_push_all_stacks()
243{
244    GC_bool found_me = FALSE;
245    size_t nthreads = 0;
246    int i;
247    GC_thread p;
248    ptr_t lo, hi;
249    /* On IA64, we also need to scan the register backing store. */
250    IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
251    pthread_t me = pthread_self();
252    
253    if (!GC_thr_initialized) GC_thr_init();
254#   if DEBUG_THREADS
255        GC_printf("Pushing stacks from thread 0x%x\n", (unsigned) me);
256#   endif
257    for (i = 0; i < THREAD_TABLE_SZ; i++) {
258      for (p = GC_threads[i]; p != 0; p = p -> next) {
259        if (p -> flags & FINISHED) continue;
260	++nthreads;
261        if (THREAD_EQUAL(p -> id, me)) {
262#  	    ifdef SPARC
263	        lo = (ptr_t)GC_save_regs_in_stack();
264#  	    else
265 	        lo = GC_approx_sp();
266#           endif
267	    found_me = TRUE;
268	    IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();)
269	} else {
270	    lo = p -> stop_info.stack_ptr;
271	    IF_IA64(bs_hi = p -> backing_store_ptr;)
272	}
273        if ((p -> flags & MAIN_THREAD) == 0) {
274	    hi = p -> stack_end;
275	    IF_IA64(bs_lo = p -> backing_store_end);
276        } else {
277            /* The original stack. */
278            hi = GC_stackbottom;
279	    IF_IA64(bs_lo = BACKING_STORE_BASE;)
280        }
281#	if DEBUG_THREADS
282            GC_printf("Stack for thread 0x%x = [%p,%p)\n",
283    	              (unsigned)(p -> id), lo, hi);
284#	endif
285	if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
286#       ifdef STACK_GROWS_UP
287	  /* We got them backwards! */
288          GC_push_all_stack(hi, lo);
289#       else
290          GC_push_all_stack(lo, hi);
291#	endif
292#	ifdef IA64
293#         if DEBUG_THREADS
294            GC_printf("Reg stack for thread 0x%x = [%lx,%lx)\n",
295    	              (unsigned)p -> id, bs_lo, bs_hi);
296#	  endif
297          if (THREAD_EQUAL(p -> id, me)) {
298	    /* FIXME:  This may add an unbounded number of entries,	*/
299	    /* and hence overflow the mark stack, which is bad.		*/
300	    GC_push_all_eager(bs_lo, bs_hi);
301	  } else {
302	    GC_push_all_stack(bs_lo, bs_hi);
303	  }
304#	endif
305      }
306    }
307    if (GC_print_stats == VERBOSE) {
308	GC_log_printf("Pushed %d thread stacks\n", nthreads);
309    }
310    if (!found_me && !GC_in_thread_creation)
311      ABORT("Collecting from unknown thread.");
312}
313
314/* There seems to be a very rare thread stopping problem.  To help us  */
315/* debug that, we save the ids of the stopping thread. */
316pthread_t GC_stopping_thread;
317int GC_stopping_pid;
318
319/* We hold the allocation lock.  Suspend all threads that might	*/
320/* still be running.  Return the number of suspend signals that	*/
321/* were sent. */
322int GC_suspend_all()
323{
324    int n_live_threads = 0;
325    int i;
326    GC_thread p;
327    int result;
328    pthread_t my_thread = pthread_self();
329    
330    GC_stopping_thread = my_thread;    /* debugging only.      */
331    GC_stopping_pid = getpid();                /* debugging only.      */
332    for (i = 0; i < THREAD_TABLE_SZ; i++) {
333      for (p = GC_threads[i]; p != 0; p = p -> next) {
334        if (!THREAD_EQUAL(p -> id, my_thread)) {
335            if (p -> flags & FINISHED) continue;
336            if (p -> stop_info.last_stop_count == GC_stop_count) continue;
337	    if (p -> thread_blocked) /* Will wait */ continue;
338            n_live_threads++;
339#	    if DEBUG_THREADS
340	      GC_printf("Sending suspend signal to 0x%x\n",
341			(unsigned)(p -> id));
342#	    endif
343        
344            result = pthread_kill(p -> id, SIG_SUSPEND);
345	    switch(result) {
346                case ESRCH:
347                    /* Not really there anymore.  Possible? */
348                    n_live_threads--;
349                    break;
350                case 0:
351                    break;
352                default:
353                    ABORT("pthread_kill failed");
354            }
355        }
356      }
357    }
358    return n_live_threads;
359}
360
361void GC_stop_world()
362{
363    int i;
364    int n_live_threads;
365    int code;
366
367    GC_ASSERT(I_HOLD_LOCK());
368#   if DEBUG_THREADS
369      GC_printf("Stopping the world from 0x%x\n", (unsigned)pthread_self());
370#   endif
371       
372    /* Make sure all free list construction has stopped before we start. */
373    /* No new construction can start, since free list construction is	*/
374    /* required to acquire and release the GC lock before it starts,	*/
375    /* and we have the lock.						*/
376#   ifdef PARALLEL_MARK
377      GC_acquire_mark_lock();
378      GC_ASSERT(GC_fl_builder_count == 0);
379      /* We should have previously waited for it to become zero. */
380#   endif /* PARALLEL_MARK */
381    AO_store(&GC_stop_count, GC_stop_count+1);
382    	/* Only concurrent reads are possible. */
383    AO_store_release(&GC_world_is_stopped, TRUE);
384    n_live_threads = GC_suspend_all();
385
386      if (GC_retry_signals) {
387	  unsigned long wait_usecs = 0;  /* Total wait since retry.	*/
388#	  define WAIT_UNIT 3000
389#	  define RETRY_INTERVAL 100000
390	  for (;;) {
391	      int ack_count;
392
393	      sem_getvalue(&GC_suspend_ack_sem, &ack_count);
394	      if (ack_count == n_live_threads) break;
395	      if (wait_usecs > RETRY_INTERVAL) {
396		  int newly_sent = GC_suspend_all();
397
398		  if (GC_print_stats) {
399		      GC_log_printf("Resent %d signals after timeout\n",
400				newly_sent);
401		  }
402		  sem_getvalue(&GC_suspend_ack_sem, &ack_count);
403		  if (newly_sent < n_live_threads - ack_count) {
404		      WARN("Lost some threads during GC_stop_world?!\n",0);
405		      n_live_threads = ack_count + newly_sent;
406		  }
407		  wait_usecs = 0;
408	      }
409	      usleep(WAIT_UNIT);
410	      wait_usecs += WAIT_UNIT;
411	  }
412      }
413    for (i = 0; i < n_live_threads; i++) {
414	retry:
415	  if (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
416	      /* On Linux, sem_wait is documented to always return zero.*/
417	      /* But the documentation appears to be incorrect.		*/
418	      if (errno == EINTR) {
419		/* Seems to happen with some versions of gdb.	*/
420		goto retry;
421	      }
422	      ABORT("sem_wait for handler failed");
423	  }
424    }
425#   ifdef PARALLEL_MARK
426      GC_release_mark_lock();
427#   endif
428    #if DEBUG_THREADS
429      GC_printf("World stopped from 0x%x\n", (unsigned)pthread_self());
430    #endif
431    GC_stopping_thread = 0;  /* debugging only */
432}
433
434/* Caller holds allocation lock, and has held it continuously since	*/
435/* the world stopped.							*/
436void GC_start_world()
437{
438    pthread_t my_thread = pthread_self();
439    register int i;
440    register GC_thread p;
441    register int n_live_threads = 0;
442    register int result;
443#   ifdef GC_NETBSD_THREADS_WORKAROUND
444      int code;
445#   endif
446
447#   if DEBUG_THREADS
448      GC_printf("World starting\n");
449#   endif
450
451    AO_store(&GC_world_is_stopped, FALSE);
452    for (i = 0; i < THREAD_TABLE_SZ; i++) {
453      for (p = GC_threads[i]; p != 0; p = p -> next) {
454        if (!THREAD_EQUAL(p -> id, my_thread)) {
455            if (p -> flags & FINISHED) continue;
456	    if (p -> thread_blocked) continue;
457            n_live_threads++;
458	    #if DEBUG_THREADS
459	      GC_printf("Sending restart signal to 0x%x\n",
460			(unsigned)(p -> id));
461	    #endif
462        
463            result = pthread_kill(p -> id, SIG_THR_RESTART);
464	    switch(result) {
465                case ESRCH:
466                    /* Not really there anymore.  Possible? */
467                    n_live_threads--;
468                    break;
469                case 0:
470                    break;
471                default:
472                    ABORT("pthread_kill failed");
473            }
474        }
475      }
476    }
477#   ifdef GC_NETBSD_THREADS_WORKAROUND
478      for (i = 0; i < n_live_threads; i++)
479	while (0 != (code = sem_wait(&GC_restart_ack_sem)))
480	    if (errno != EINTR) {
481		GC_err_printf1("sem_wait() returned %ld\n",
482			       (unsigned long)code);
483		ABORT("sem_wait() for restart handler failed");
484	    }
485#    endif
486#    if DEBUG_THREADS
487      GC_printf("World started\n");
488#    endif
489}
490
491void GC_stop_init() {
492    struct sigaction act;
493    
494    if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0)
495        ABORT("sem_init failed");
496#   ifdef GC_NETBSD_THREADS_WORKAROUND
497      if (sem_init(&GC_restart_ack_sem, 0, 0) != 0)
498	ABORT("sem_init failed");
499#   endif
500
501    act.sa_flags = SA_RESTART | SA_SIGINFO;
502    if (sigfillset(&act.sa_mask) != 0) {
503    	ABORT("sigfillset() failed");
504    }
505    GC_remove_allowed_signals(&act.sa_mask);
506    /* SIG_THR_RESTART is set in the resulting mask.		*/
507    /* It is unmasked by the handler when necessary. 		*/
508    act.sa_sigaction = GC_suspend_handler;
509    if (sigaction(SIG_SUSPEND, &act, NULL) != 0) {
510    	ABORT("Cannot set SIG_SUSPEND handler");
511    }
512
513    act.sa_flags &= ~ SA_SIGINFO;
514    act.sa_handler = GC_restart_handler;
515    if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) {
516    	ABORT("Cannot set SIG_THR_RESTART handler");
517    }
518
519    /* Inititialize suspend_handler_mask. It excludes SIG_THR_RESTART. */
520      if (sigfillset(&suspend_handler_mask) != 0) ABORT("sigfillset() failed");
521      GC_remove_allowed_signals(&suspend_handler_mask);
522      if (sigdelset(&suspend_handler_mask, SIG_THR_RESTART) != 0)
523	  ABORT("sigdelset() failed");
524
525    /* Check for GC_RETRY_SIGNALS.	*/
526      if (0 != GETENV("GC_RETRY_SIGNALS")) {
527	  GC_retry_signals = TRUE;
528      }
529      if (0 != GETENV("GC_NO_RETRY_SIGNALS")) {
530	  GC_retry_signals = FALSE;
531      }
532      if (GC_print_stats && GC_retry_signals) {
533          GC_log_printf("Will retry suspend signal if necessary.\n");
534      }
535}
536
537#endif