Synopsis - Cross-Reference
File: /src/Synopsis/gc/pthread_stop_world.c1#include "private/pthread_support.h" 2 3#if defined(GC_PTHREADS) && !defined(GC_WIN32_THREADS) && \ 4 !defined(GC_DARWIN_THREADS) 5 6#include <signal.h> 7#include <semaphore.h> 8#include <errno.h> 9#include <unistd.h> 10#include "atomic_ops.h" 11 12#if DEBUG_THREADS 13 14#ifndef NSIG 15# if defined(MAXSIG) 16# define NSIG (MAXSIG+1) 17# elif defined(_NSIG) 18# define NSIG _NSIG 19# elif defined(__SIGRTMAX) 20# define NSIG (__SIGRTMAX+1) 21# else 22 --> please fix it 23# endif 24#endif 25 26void GC_print_sig_mask() 27{ 28 sigset_t blocked; 29 int i; 30 31 if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0) 32 ABORT("pthread_sigmask"); 33 GC_printf("Blocked: "); 34 for (i = 1; i < NSIG; i++) { 35 if (sigismember(&blocked, i)) { GC_printf("%d ", i); } 36 } 37 GC_printf("\n"); 38} 39 40#endif 41 42/* Remove the signals that we want to allow in thread stopping */ 43/* handler from a set. */ 44void GC_remove_allowed_signals(sigset_t *set) 45{ 46 if (sigdelset(set, SIGINT) != 0 47 || sigdelset(set, SIGQUIT) != 0 48 || sigdelset(set, SIGABRT) != 0 49 || sigdelset(set, SIGTERM) != 0) { 50 ABORT("sigdelset() failed"); 51 } 52 53# ifdef MPROTECT_VDB 54 /* Handlers write to the thread structure, which is in the heap, */ 55 /* and hence can trigger a protection fault. */ 56 if (sigdelset(set, SIGSEGV) != 0 57# ifdef SIGBUS 58 || sigdelset(set, SIGBUS) != 0 59# endif 60 ) { 61 ABORT("sigdelset() failed"); 62 } 63# endif 64} 65 66static sigset_t suspend_handler_mask; 67 68volatile AO_t GC_stop_count; 69 /* Incremented at the beginning of GC_stop_world. */ 70 71volatile AO_t GC_world_is_stopped = FALSE; 72 /* FALSE ==> it is safe for threads to restart, i.e. */ 73 /* they will see another suspend signal before they */ 74 /* are expected to stop (unless they have voluntarily */ 75 /* stopped). */ 76 77#ifdef GC_OSF1_THREADS 78 GC_bool GC_retry_signals = TRUE; 79#else 80 GC_bool GC_retry_signals = FALSE; 81#endif 82 83/* 84 * We use signals to stop threads during GC. 85 * 86 * Suspended threads wait in signal handler for SIG_THR_RESTART. 87 * That's more portable than semaphores or condition variables. 88 * (We do use sem_post from a signal handler, but that should be portable.) 89 * 90 * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h. 91 * Note that we can't just stop a thread; we need it to save its stack 92 * pointer(s) and acknowledge. 93 */ 94 95#ifndef SIG_THR_RESTART 96# if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || defined(GC_NETBSD_THREADS) 97# ifdef _SIGRTMIN 98# define SIG_THR_RESTART _SIGRTMIN + 5 99# else 100# define SIG_THR_RESTART SIGRTMIN + 5 101# endif 102# else 103# define SIG_THR_RESTART SIGXCPU 104# endif 105#endif 106 107sem_t GC_suspend_ack_sem; 108 109#ifdef GC_NETBSD_THREADS 110# define GC_NETBSD_THREADS_WORKAROUND 111 /* It seems to be necessary to wait until threads have restarted. */ 112 /* But it is unclear why that is the case. */ 113 sem_t GC_restart_ack_sem; 114#endif 115 116void GC_suspend_handler_inner(ptr_t sig_arg, void *context); 117 118#if defined(IA64) || defined(HP_PA) || defined(M68K) 119void GC_suspend_handler(int sig, siginfo_t *info, void *context) 120{ 121 int old_errno = errno; 122 GC_with_callee_saves_pushed(GC_suspend_handler_inner, (ptr_t)(word)sig); 123 errno = old_errno; 124} 125#else 126/* We believe that in all other cases the full context is already */ 127/* in the signal handler frame. */ 128void GC_suspend_handler(int sig, siginfo_t *info, void *context) 129{ 130 int old_errno = errno; 131 GC_suspend_handler_inner((ptr_t)(word)sig, context); 132 errno = old_errno; 133} 134#endif 135 136void GC_suspend_handler_inner(ptr_t sig_arg, void *context) 137{ 138 int sig = (int)(word)sig_arg; 139 int dummy; 140 pthread_t my_thread = pthread_self(); 141 GC_thread me; 142# ifdef PARALLEL_MARK 143 word my_mark_no = GC_mark_no; 144 /* Marker can't proceed until we acknowledge. Thus this is */ 145 /* guaranteed to be the mark_no correspending to our */ 146 /* suspension, i.e. the marker can't have incremented it yet. */ 147# endif 148 AO_t my_stop_count = AO_load(&GC_stop_count); 149 150 if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler"); 151 152# if DEBUG_THREADS 153 GC_printf("Suspending 0x%x\n", (unsigned)my_thread); 154# endif 155 156 me = GC_lookup_thread(my_thread); 157 /* The lookup here is safe, since I'm doing this on behalf */ 158 /* of a thread which holds the allocation lock in order */ 159 /* to stop the world. Thus concurrent modification of the */ 160 /* data structure is impossible. */ 161 if (me -> stop_info.last_stop_count == my_stop_count) { 162 /* Duplicate signal. OK if we are retrying. */ 163 if (!GC_retry_signals) { 164 WARN("Duplicate suspend signal in thread %lx\n", 165 pthread_self()); 166 } 167 return; 168 } 169# ifdef SPARC 170 me -> stop_info.stack_ptr = GC_save_regs_in_stack(); 171# else 172 me -> stop_info.stack_ptr = (ptr_t)(&dummy); 173# endif 174# ifdef IA64 175 me -> backing_store_ptr = GC_save_regs_in_stack(); 176# endif 177 178 /* Tell the thread that wants to stop the world that this */ 179 /* thread has been stopped. Note that sem_post() is */ 180 /* the only async-signal-safe primitive in LinuxThreads. */ 181 sem_post(&GC_suspend_ack_sem); 182 me -> stop_info.last_stop_count = my_stop_count; 183 184 /* Wait until that thread tells us to restart by sending */ 185 /* this thread a SIG_THR_RESTART signal. */ 186 /* SIG_THR_RESTART should be masked at this point. Thus there */ 187 /* is no race. */ 188 /* We do not continue until we receive a SIG_THR_RESTART, */ 189 /* but we do not take that as authoritative. (We may be */ 190 /* accidentally restarted by one of the user signals we */ 191 /* don't block.) After we receive the signal, we use a */ 192 /* primitive and expensive mechanism to wait until it's */ 193 /* really safe to proceed. Under normal circumstances, */ 194 /* this code should not be executed. */ 195 do { 196 sigsuspend (&suspend_handler_mask); 197 } while (AO_load_acquire(&GC_world_is_stopped) 198 && AO_load(&GC_stop_count) == my_stop_count); 199 /* If the RESTART signal gets lost, we can still lose. That should be */ 200 /* less likely than losing the SUSPEND signal, since we don't do much */ 201 /* between the sem_post and sigsuspend. */ 202 /* We'd need more handshaking to work around that. */ 203 /* Simply dropping the sigsuspend call should be safe, but is unlikely */ 204 /* to be efficient. */ 205 206# if DEBUG_THREADS 207 GC_printf("Continuing 0x%x\n", (unsigned)my_thread); 208# endif 209} 210 211void GC_restart_handler(int sig) 212{ 213 pthread_t my_thread = pthread_self(); 214 GC_thread me; 215 216 if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler"); 217 218# ifdef GC_NETBSD_THREADS_WORKAROUND 219 sem_post(&GC_restart_ack_sem); 220# endif 221 222 /* 223 ** Note: even if we don't do anything useful here, 224 ** it would still be necessary to have a signal handler, 225 ** rather than ignoring the signals, otherwise 226 ** the signals will not be delivered at all, and 227 ** will thus not interrupt the sigsuspend() above. 228 */ 229 230# if DEBUG_THREADS 231 GC_printf("In GC_restart_handler for 0x%x\n", (unsigned)pthread_self()); 232# endif 233} 234 235# ifdef IA64 236# define IF_IA64(x) x 237# else 238# define IF_IA64(x) 239# endif 240/* We hold allocation lock. Should do exactly the right thing if the */ 241/* world is stopped. Should not fail if it isn't. */ 242void GC_push_all_stacks() 243{ 244 GC_bool found_me = FALSE; 245 size_t nthreads = 0; 246 int i; 247 GC_thread p; 248 ptr_t lo, hi; 249 /* On IA64, we also need to scan the register backing store. */ 250 IF_IA64(ptr_t bs_lo; ptr_t bs_hi;) 251 pthread_t me = pthread_self(); 252 253 if (!GC_thr_initialized) GC_thr_init(); 254# if DEBUG_THREADS 255 GC_printf("Pushing stacks from thread 0x%x\n", (unsigned) me); 256# endif 257 for (i = 0; i < THREAD_TABLE_SZ; i++) { 258 for (p = GC_threads[i]; p != 0; p = p -> next) { 259 if (p -> flags & FINISHED) continue; 260 ++nthreads; 261 if (THREAD_EQUAL(p -> id, me)) { 262# ifdef SPARC 263 lo = (ptr_t)GC_save_regs_in_stack(); 264# else 265 lo = GC_approx_sp(); 266# endif 267 found_me = TRUE; 268 IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();) 269 } else { 270 lo = p -> stop_info.stack_ptr; 271 IF_IA64(bs_hi = p -> backing_store_ptr;) 272 } 273 if ((p -> flags & MAIN_THREAD) == 0) { 274 hi = p -> stack_end; 275 IF_IA64(bs_lo = p -> backing_store_end); 276 } else { 277 /* The original stack. */ 278 hi = GC_stackbottom; 279 IF_IA64(bs_lo = BACKING_STORE_BASE;) 280 } 281# if DEBUG_THREADS 282 GC_printf("Stack for thread 0x%x = [%p,%p)\n", 283 (unsigned)(p -> id), lo, hi); 284# endif 285 if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n"); 286# ifdef STACK_GROWS_UP 287 /* We got them backwards! */ 288 GC_push_all_stack(hi, lo); 289# else 290 GC_push_all_stack(lo, hi); 291# endif 292# ifdef IA64 293# if DEBUG_THREADS 294 GC_printf("Reg stack for thread 0x%x = [%lx,%lx)\n", 295 (unsigned)p -> id, bs_lo, bs_hi); 296# endif 297 if (THREAD_EQUAL(p -> id, me)) { 298 /* FIXME: This may add an unbounded number of entries, */ 299 /* and hence overflow the mark stack, which is bad. */ 300 GC_push_all_eager(bs_lo, bs_hi); 301 } else { 302 GC_push_all_stack(bs_lo, bs_hi); 303 } 304# endif 305 } 306 } 307 if (GC_print_stats == VERBOSE) { 308 GC_log_printf("Pushed %d thread stacks\n", nthreads); 309 } 310 if (!found_me && !GC_in_thread_creation) 311 ABORT("Collecting from unknown thread."); 312} 313 314/* There seems to be a very rare thread stopping problem. To help us */ 315/* debug that, we save the ids of the stopping thread. */ 316pthread_t GC_stopping_thread; 317int GC_stopping_pid; 318 319/* We hold the allocation lock. Suspend all threads that might */ 320/* still be running. Return the number of suspend signals that */ 321/* were sent. */ 322int GC_suspend_all() 323{ 324 int n_live_threads = 0; 325 int i; 326 GC_thread p; 327 int result; 328 pthread_t my_thread = pthread_self(); 329 330 GC_stopping_thread = my_thread; /* debugging only. */ 331 GC_stopping_pid = getpid(); /* debugging only. */ 332 for (i = 0; i < THREAD_TABLE_SZ; i++) { 333 for (p = GC_threads[i]; p != 0; p = p -> next) { 334 if (!THREAD_EQUAL(p -> id, my_thread)) { 335 if (p -> flags & FINISHED) continue; 336 if (p -> stop_info.last_stop_count == GC_stop_count) continue; 337 if (p -> thread_blocked) /* Will wait */ continue; 338 n_live_threads++; 339# if DEBUG_THREADS 340 GC_printf("Sending suspend signal to 0x%x\n", 341 (unsigned)(p -> id)); 342# endif 343 344 result = pthread_kill(p -> id, SIG_SUSPEND); 345 switch(result) { 346 case ESRCH: 347 /* Not really there anymore. Possible? */ 348 n_live_threads--; 349 break; 350 case 0: 351 break; 352 default: 353 ABORT("pthread_kill failed"); 354 } 355 } 356 } 357 } 358 return n_live_threads; 359} 360 361void GC_stop_world() 362{ 363 int i; 364 int n_live_threads; 365 int code; 366 367 GC_ASSERT(I_HOLD_LOCK()); 368# if DEBUG_THREADS 369 GC_printf("Stopping the world from 0x%x\n", (unsigned)pthread_self()); 370# endif 371 372 /* Make sure all free list construction has stopped before we start. */ 373 /* No new construction can start, since free list construction is */ 374 /* required to acquire and release the GC lock before it starts, */ 375 /* and we have the lock. */ 376# ifdef PARALLEL_MARK 377 GC_acquire_mark_lock(); 378 GC_ASSERT(GC_fl_builder_count == 0); 379 /* We should have previously waited for it to become zero. */ 380# endif /* PARALLEL_MARK */ 381 AO_store(&GC_stop_count, GC_stop_count+1); 382 /* Only concurrent reads are possible. */ 383 AO_store_release(&GC_world_is_stopped, TRUE); 384 n_live_threads = GC_suspend_all(); 385 386 if (GC_retry_signals) { 387 unsigned long wait_usecs = 0; /* Total wait since retry. */ 388# define WAIT_UNIT 3000 389# define RETRY_INTERVAL 100000 390 for (;;) { 391 int ack_count; 392 393 sem_getvalue(&GC_suspend_ack_sem, &ack_count); 394 if (ack_count == n_live_threads) break; 395 if (wait_usecs > RETRY_INTERVAL) { 396 int newly_sent = GC_suspend_all(); 397 398 if (GC_print_stats) { 399 GC_log_printf("Resent %d signals after timeout\n", 400 newly_sent); 401 } 402 sem_getvalue(&GC_suspend_ack_sem, &ack_count); 403 if (newly_sent < n_live_threads - ack_count) { 404 WARN("Lost some threads during GC_stop_world?!\n",0); 405 n_live_threads = ack_count + newly_sent; 406 } 407 wait_usecs = 0; 408 } 409 usleep(WAIT_UNIT); 410 wait_usecs += WAIT_UNIT; 411 } 412 } 413 for (i = 0; i < n_live_threads; i++) { 414 retry: 415 if (0 != (code = sem_wait(&GC_suspend_ack_sem))) { 416 /* On Linux, sem_wait is documented to always return zero.*/ 417 /* But the documentation appears to be incorrect. */ 418 if (errno == EINTR) { 419 /* Seems to happen with some versions of gdb. */ 420 goto retry; 421 } 422 ABORT("sem_wait for handler failed"); 423 } 424 } 425# ifdef PARALLEL_MARK 426 GC_release_mark_lock(); 427# endif 428 #if DEBUG_THREADS 429 GC_printf("World stopped from 0x%x\n", (unsigned)pthread_self()); 430 #endif 431 GC_stopping_thread = 0; /* debugging only */ 432} 433 434/* Caller holds allocation lock, and has held it continuously since */ 435/* the world stopped. */ 436void GC_start_world() 437{ 438 pthread_t my_thread = pthread_self(); 439 register int i; 440 register GC_thread p; 441 register int n_live_threads = 0; 442 register int result; 443# ifdef GC_NETBSD_THREADS_WORKAROUND 444 int code; 445# endif 446 447# if DEBUG_THREADS 448 GC_printf("World starting\n"); 449# endif 450 451 AO_store(&GC_world_is_stopped, FALSE); 452 for (i = 0; i < THREAD_TABLE_SZ; i++) { 453 for (p = GC_threads[i]; p != 0; p = p -> next) { 454 if (!THREAD_EQUAL(p -> id, my_thread)) { 455 if (p -> flags & FINISHED) continue; 456 if (p -> thread_blocked) continue; 457 n_live_threads++; 458 #if DEBUG_THREADS 459 GC_printf("Sending restart signal to 0x%x\n", 460 (unsigned)(p -> id)); 461 #endif 462 463 result = pthread_kill(p -> id, SIG_THR_RESTART); 464 switch(result) { 465 case ESRCH: 466 /* Not really there anymore. Possible? */ 467 n_live_threads--; 468 break; 469 case 0: 470 break; 471 default: 472 ABORT("pthread_kill failed"); 473 } 474 } 475 } 476 } 477# ifdef GC_NETBSD_THREADS_WORKAROUND 478 for (i = 0; i < n_live_threads; i++) 479 while (0 != (code = sem_wait(&GC_restart_ack_sem))) 480 if (errno != EINTR) { 481 GC_err_printf1("sem_wait() returned %ld\n", 482 (unsigned long)code); 483 ABORT("sem_wait() for restart handler failed"); 484 } 485# endif 486# if DEBUG_THREADS 487 GC_printf("World started\n"); 488# endif 489} 490 491void GC_stop_init() { 492 struct sigaction act; 493 494 if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0) 495 ABORT("sem_init failed"); 496# ifdef GC_NETBSD_THREADS_WORKAROUND 497 if (sem_init(&GC_restart_ack_sem, 0, 0) != 0) 498 ABORT("sem_init failed"); 499# endif 500 501 act.sa_flags = SA_RESTART | SA_SIGINFO; 502 if (sigfillset(&act.sa_mask) != 0) { 503 ABORT("sigfillset() failed"); 504 } 505 GC_remove_allowed_signals(&act.sa_mask); 506 /* SIG_THR_RESTART is set in the resulting mask. */ 507 /* It is unmasked by the handler when necessary. */ 508 act.sa_sigaction = GC_suspend_handler; 509 if (sigaction(SIG_SUSPEND, &act, NULL) != 0) { 510 ABORT("Cannot set SIG_SUSPEND handler"); 511 } 512 513 act.sa_flags &= ~ SA_SIGINFO; 514 act.sa_handler = GC_restart_handler; 515 if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) { 516 ABORT("Cannot set SIG_THR_RESTART handler"); 517 } 518 519 /* Inititialize suspend_handler_mask. It excludes SIG_THR_RESTART. */ 520 if (sigfillset(&suspend_handler_mask) != 0) ABORT("sigfillset() failed"); 521 GC_remove_allowed_signals(&suspend_handler_mask); 522 if (sigdelset(&suspend_handler_mask, SIG_THR_RESTART) != 0) 523 ABORT("sigdelset() failed"); 524 525 /* Check for GC_RETRY_SIGNALS. */ 526 if (0 != GETENV("GC_RETRY_SIGNALS")) { 527 GC_retry_signals = TRUE; 528 } 529 if (0 != GETENV("GC_NO_RETRY_SIGNALS")) { 530 GC_retry_signals = FALSE; 531 } 532 if (GC_print_stats && GC_retry_signals) { 533 GC_log_printf("Will retry suspend signal if necessary.\n"); 534 } 535} 536 537#endif