Blame - src/runtime/preempt.go - go

blob: dae417215fd66ea47fbe90def1ef91710b250e42 [file] [log] [blame]

Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	1	// Copyright 2019 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// Goroutine preemption
				6	//
				7	// A goroutine can be preempted at any safe-point. Currently, there
				8	// are a few categories of safe-points:
				9	//
				10	// 1. A blocked safe-point occurs for the duration that a goroutine is
				11	// descheduled, blocked on synchronization, or in a system call.
				12	//
				13	// 2. Synchronous safe-points occur when a running goroutine checks
				14	// for a preemption request.
				15	//
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	16	// 3. Asynchronous safe-points occur at any instruction in user code
				17	// where the goroutine can be safely paused and a conservative
				18	// stack and register scan can find stack roots. The runtime can
				19	// stop a goroutine at an async safe-point using a signal.
				20	//
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	21	// At both blocked and synchronous safe-points, a goroutine's CPU
				22	// state is minimal and the garbage collector has complete information
				23	// about its entire stack. This makes it possible to deschedule a
				24	// goroutine with minimal space, and to precisely scan a goroutine's
				25	// stack.
				26	//
				27	// Synchronous safe-points are implemented by overloading the stack
				28	// bound check in function prologues. To preempt a goroutine at the
				29	// next synchronous safe-point, the runtime poisons the goroutine's
				30	// stack bound to a value that will cause the next stack bound check
				31	// to fail and enter the stack growth implementation, which will
				32	// detect that it was actually a preemption and redirect to preemption
				33	// handling.
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	34	//
				35	// Preemption at asynchronous safe-points is implemented by suspending
				36	// the thread using an OS mechanism (e.g., signals) and inspecting its
				37	// state to determine if the goroutine was at an asynchronous
				38	// safe-point. Since the thread suspension itself is generally
				39	// asynchronous, it also checks if the running goroutine wants to be
				40	// preempted, since this could have changed. If all conditions are
				41	// satisfied, it adjusts the signal context to make it look like the
				42	// signaled thread just called asyncPreempt and resumes the thread.
				43	// asyncPreempt spills all registers and enters the scheduler.
				44	//
				45	// (An alternative would be to preempt in the signal handler itself.
				46	// This would let the OS save and restore the register state and the
				47	// runtime would only need to know how to extract potentially
				48	// pointer-containing registers from the signal context. However, this
				49	// would consume an M for every preempted G, and the scheduler itself
				50	// is not designed to run from a signal handler, as it tends to
				51	// allocate memory and start threads in the preemption path.)
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	52
				53	package runtime
				54
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	55	import (
Cherry Mui	fb42fb7	2021-05-20 18:55:47 -0400	[diff] [blame]	56	"internal/abi"
Michael Anthony Knyszek	6d85891	2021-06-16 23:05:44 +0000	[diff] [blame]	57	"internal/goarch"
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	58	)
				59
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	60	type suspendGState struct {
				61	g *g
				62
				63	// dead indicates the goroutine was not suspended because it
				64	// is dead. This goroutine could be reused after the dead
				65	// state was observed, so the caller must not assume that it
				66	// remains dead.
				67	dead bool
				68
				69	// stopped indicates that this suspendG transitioned the G to
				70	// _Gwaiting via g.preemptStop and thus is responsible for
				71	// readying it when done.
				72	stopped bool
				73	}
				74
				75	// suspendG suspends goroutine gp at a safe-point and returns the
				76	// state of the suspended goroutine. The caller gets read access to
				77	// the goroutine until it calls resumeG.
				78	//
				79	// It is safe for multiple callers to attempt to suspend the same
				80	// goroutine at the same time. The goroutine may execute between
				81	// subsequent successful suspend operations. The current
				82	// implementation grants exclusive access to the goroutine, and hence
				83	// multiple callers will serialize. However, the intent is to grant
				84	// shared read access, so please don't depend on exclusive access.
				85	//
				86	// This must be called from the system stack and the user goroutine on
				87	// the current M (if any) must be in a preemptible state. This
				88	// prevents deadlocks where two goroutines attempt to suspend each
				89	// other and both are in non-preemptible states. There are other ways
				90	// to resolve this deadlock, but this seems simplest.
				91	//
				92	// TODO(austin): What if we instead required this to be called from a
				93	// user goroutine? Then we could deschedule the goroutine while
				94	// waiting instead of blocking the thread. If two goroutines tried to
				95	// suspend each other, one of them would win and the other wouldn't
				96	// complete the suspend until it was resumed. We would have to be
				97	// careful that they couldn't actually queue up suspend for each other
				98	// and then both be suspended. This would also avoid the need for a
				99	// kernel context switch in the synchronous case because we could just
				100	// directly schedule the waiter. The context switch is unavoidable in
				101	// the signal case.
				102	//
				103	//go:systemstack
				104	func suspendG(gp *g) suspendGState {
				105	if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
				106	// Since we're on the system stack of this M, the user
				107	// G is stuck at an unsafe point. If another goroutine
				108	// were to try to preempt m.curg, it could deadlock.
				109	throw("suspendG from non-preemptible goroutine")
				110	}
				111
				112	// See https://golang.org/cl/21503 for justification of the yield delay.
				113	const yieldDelay = 10 * 1000
				114	var nextYield int64
				115
				116	// Drive the goroutine to a preemption point.
				117	stopped := false
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	118	var asyncM *m
				119	var asyncGen uint32
Austin Clements	b89b462	2019-10-25 16:17:41 -0400	[diff] [blame]	120	var nextPreemptM int64
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	121	for i := 0; ; i++ {
				122	switch s := readgstatus(gp); s {
				123	default:
				124	if s&_Gscan != 0 {
				125	// Someone else is suspending it. Wait
				126	// for them to finish.
				127	//
				128	// TODO: It would be nicer if we could
				129	// coalesce suspends.
				130	break
				131	}
				132
				133	dumpgstatus(gp)
				134	throw("invalid g status")
				135
				136	case _Gdead:
				137	// Nothing to suspend.
				138	//
				139	// preemptStop may need to be cleared, but
				140	// doing that here could race with goroutine
				141	// reuse. Instead, goexit0 clears it.
				142	return suspendGState{dead: true}
				143
				144	case _Gcopystack:
				145	// The stack is being copied. We need to wait
				146	// until this is done.
				147
				148	case _Gpreempted:
				149	// We (or someone else) suspended the G. Claim
				150	// ownership of it by transitioning it to
				151	// _Gwaiting.
				152	if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
				153	break
				154	}
				155
				156	// We stopped the G, so we have to ready it later.
				157	stopped = true
				158
				159	s = _Gwaiting
				160	fallthrough
				161
				162	case _Grunnable, _Gsyscall, _Gwaiting:
				163	// Claim goroutine by setting scan bit.
				164	// This may race with execution or readying of gp.
				165	// The scan bit keeps it from transition state.
				166	if !castogscanstatus(gp, s, s\|_Gscan) {
				167	break
				168	}
				169
				170	// Clear the preemption request. It's safe to
				171	// reset the stack guard because we hold the
				172	// _Gscan bit and thus own the stack.
				173	gp.preemptStop = false
				174	gp.preempt = false
				175	gp.stackguard0 = gp.stack.lo + _StackGuard
				176
				177	// The goroutine was already at a safe-point
				178	// and we've now locked that in.
				179	//
				180	// TODO: It would be much better if we didn't
				181	// leave it in _Gscan, but instead gently
				182	// prevented its scheduling until resumption.
				183	// Maybe we only use this to bump a suspended
				184	// count and the scheduler skips suspended
				185	// goroutines? That wouldn't be enough for
				186	// {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
				187	// for all those transitions we need to check
				188	// suspended and deschedule?
				189	return suspendGState{g: gp, stopped: stopped}
				190
				191	case _Grunning:
				192	// Optimization: if there is already a pending preemption request
				193	// (from the previous loop iteration), don't bother with the atomics.
Cuong Manh Le	bc80579	2022-08-17 17:36:12 +0700	[diff] [blame^]	194	if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen {
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	195	break
				196	}
				197
				198	// Temporarily block state transitions.
				199	if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
				200	break
				201	}
				202
				203	// Request synchronous preemption.
				204	gp.preemptStop = true
				205	gp.preempt = true
				206	gp.stackguard0 = stackPreempt
				207
Austin Clements	b89b462	2019-10-25 16:17:41 -0400	[diff] [blame]	208	// Prepare for asynchronous preemption.
				209	asyncM2 := gp.m
Cuong Manh Le	bc80579	2022-08-17 17:36:12 +0700	[diff] [blame^]	210	asyncGen2 := asyncM2.preemptGen.Load()
Austin Clements	b89b462	2019-10-25 16:17:41 -0400	[diff] [blame]	211	needAsync := asyncM != asyncM2 \|\| asyncGen != asyncGen2
				212	asyncM = asyncM2
				213	asyncGen = asyncGen2
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	214
				215	casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
Austin Clements	b89b462	2019-10-25 16:17:41 -0400	[diff] [blame]	216
				217	// Send asynchronous preemption. We do this
				218	// after CASing the G back to _Grunning
				219	// because preemptM may be synchronous and we
				220	// don't want to catch the G just spinning on
				221	// its status.
				222	if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
				223	// Rate limit preemptM calls. This is
				224	// particularly important on Windows
				225	// where preemptM is actually
				226	// synchronous and the spin loop here
				227	// can lead to live-lock.
				228	now := nanotime()
				229	if now >= nextPreemptM {
				230	nextPreemptM = now + yieldDelay/2
				231	preemptM(asyncM)
				232	}
				233	}
Austin Clements	3f83411	2019-09-27 12:27:51 -0400	[diff] [blame]	234	}
				235
				236	// TODO: Don't busy wait. This loop should really only
				237	// be a simple read/decide/CAS loop that only fails if
				238	// there's an active race. Once the CAS succeeds, we
				239	// should queue up the preemption (which will require
				240	// it to be reliable in the _Grunning case, not
				241	// best-effort) and then sleep until we're notified
				242	// that the goroutine is suspended.
				243	if i == 0 {
				244	nextYield = nanotime() + yieldDelay
				245	}
				246	if nanotime() < nextYield {
				247	procyield(10)
				248	} else {
				249	osyield()
				250	nextYield = nanotime() + yieldDelay/2
				251	}
				252	}
				253	}
				254
				255	// resumeG undoes the effects of suspendG, allowing the suspended
				256	// goroutine to continue from its current safe-point.
				257	func resumeG(state suspendGState) {
				258	if state.dead {
				259	// We didn't actually stop anything.
				260	return
				261	}
				262
				263	gp := state.g
				264	switch s := readgstatus(gp); s {
				265	default:
				266	dumpgstatus(gp)
				267	throw("unexpected g status")
				268
				269	case _Grunnable \| _Gscan,
				270	_Gwaiting \| _Gscan,
				271	_Gsyscall \| _Gscan:
				272	casfrom_Gscanstatus(gp, s, s&^_Gscan)
				273	}
				274
				275	if state.stopped {
				276	// We stopped it, so we need to re-schedule it.
				277	ready(gp, 0, true)
				278	}
				279	}
Austin Clements	d196901	2019-10-04 18:54:00 -0400	[diff] [blame]	280
				281	// canPreemptM reports whether mp is in a state that is safe to preempt.
				282	//
				283	// It is nosplit because it has nosplit callers.
				284	//
				285	//go:nosplit
				286	func canPreemptM(mp *m) bool {
				287	return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
				288	}
Austin Clements	a3ffb0d	2019-10-16 19:10:06 -0400	[diff] [blame]	289
				290	//go:generate go run mkpreempt.go
				291
				292	// asyncPreempt saves all user registers and calls asyncPreempt2.
				293	//
				294	// When stack scanning encounters an asyncPreempt frame, it scans that
				295	// frame and its parent frame conservatively.
				296	//
				297	// asyncPreempt is implemented in assembly.
				298	func asyncPreempt()
				299
				300	//go:nosplit
				301	func asyncPreempt2() {
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	302	gp := getg()
				303	gp.asyncSafePoint = true
Austin Clements	177a36a	2019-10-12 21:23:29 -0400	[diff] [blame]	304	if gp.preemptStop {
				305	mcall(preemptPark)
				306	} else {
				307	mcall(gopreempt_m)
				308	}
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	309	gp.asyncSafePoint = false
				310	}
				311
				312	// asyncPreemptStack is the bytes of stack space required to inject an
				313	// asyncPreempt call.
				314	var asyncPreemptStack = ^uintptr(0)
				315
				316	func init() {
Cherry Mui	fb42fb7	2021-05-20 18:55:47 -0400	[diff] [blame]	317	f := findfunc(abi.FuncPCABI0(asyncPreempt))
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	318	total := funcMaxSPDelta(f)
Cherry Mui	626e89c	2021-05-21 13:37:19 -0400	[diff] [blame]	319	f = findfunc(abi.FuncPCABIInternal(asyncPreempt2))
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	320	total += funcMaxSPDelta(f)
				321	// Add some overhead for return PCs, etc.
Michael Anthony Knyszek	6d85891	2021-06-16 23:05:44 +0000	[diff] [blame]	322	asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	323	if asyncPreemptStack > _StackLimit {
				324	// We need more than the nosplit limit. This isn't
				325	// unsafe, but it may limit asynchronous preemption.
				326	//
				327	// This may be a problem if we start using more
				328	// registers. In that case, we should store registers
				329	// in a context object. If we pre-allocate one per P,
				330	// asyncPreempt can spill just a few registers to the
				331	// stack, then grab its context object and spill into
				332	// it. When it enters the runtime, it would allocate a
				333	// new context for the P.
				334	print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n")
				335	throw("async stack too large")
				336	}
				337	}
				338
				339	// wantAsyncPreempt returns whether an asynchronous preemption is
				340	// queued for gp.
				341	func wantAsyncPreempt(gp *g) bool {
Austin Clements	177a36a	2019-10-12 21:23:29 -0400	[diff] [blame]	342	// Check both the G and the P.
				343	return (gp.preempt \|\| gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	344	}
				345
				346	// isAsyncSafePoint reports whether gp at instruction PC is an
				347	// asynchronous safe point. This indicates that:
				348	//
				349	// 1. It's safe to suspend gp and conservatively scan its stack and
				350	// registers. There are no potentially hidden pointer values and it's
				351	// not in the middle of an atomic sequence like a write barrier.
				352	//
				353	// 2. gp has enough stack space to inject the asyncPreempt call.
				354	//
				355	// 3. It's generally safe to interact with the runtime, even if we're
				356	// in a signal handler stopped here. For example, there are no runtime
				357	// locks held, so acquiring a runtime lock won't self-deadlock.
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	358	//
				359	// In some cases the PC is safe for asynchronous preemption but it
				360	// also needs to adjust the resumption PC. The new PC is returned in
				361	// the second result.
				362	func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) {
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	363	mp := gp.m
				364
				365	// Only user Gs can have safe-points. We check this first
				366	// because it's extremely common that we'll catch mp in the
				367	// scheduler processing this G preemption.
				368	if mp.curg != gp {
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	369	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	370	}
				371
				372	// Check M state.
				373	if mp.p == 0 \|\| !canPreemptM(mp) {
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	374	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	375	}
				376
				377	// Check stack space.
				378	if sp < gp.stack.lo \|\| sp-gp.stack.lo < asyncPreemptStack {
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	379	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	380	}
				381
				382	// Check if PC is an unsafe-point.
				383	f := findfunc(pc)
				384	if !f.valid() {
				385	// Not Go code.
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	386	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	387	}
Cherry Zhang	a930fed	2019-10-26 22:54:28 -0400	[diff] [blame]	388	if (GOARCH == "mips" \|\| GOARCH == "mipsle" \|\| GOARCH == "mips64" \|\| GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc, nil) == 0 {
				389	// We probably stopped at a half-executed CALL instruction,
				390	// where the LR is updated but the PC has not. If we preempt
				391	// here we'll see a seemingly self-recursive call, which is in
				392	// fact not.
				393	// This is normally ok, as we use the return address saved on
				394	// stack for unwinding, not the LR value. But if this is a
				395	// call to morestack, we haven't created the frame, and we'll
				396	// use the LR for unwinding, which will be bad.
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	397	return false, 0
Cherry Zhang	a930fed	2019-10-26 22:54:28 -0400	[diff] [blame]	398	}
Cherry Zhang	8414b1a	2020-10-21 20:43:16 -0400	[diff] [blame]	399	up, startpc := pcdatavalue2(f, _PCDATA_UnsafePoint, pc)
Cherry Mui	8ff16c1	2021-08-04 19:41:19 -0400	[diff] [blame]	400	if up == _PCDATA_UnsafePointUnsafe {
Cherry Zhang	8414b1a	2020-10-21 20:43:16 -0400	[diff] [blame]	401	// Unsafe-point marked by compiler. This includes
				402	// atomic sequences (e.g., write barrier) and nosplit
				403	// functions (except at calls).
				404	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	405	}
Josh Bleecher Snyder	c2483a5	2021-10-01 16:25:32 -0700	[diff] [blame]	406	if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil \|\| f.flag&funcFlag_ASM != 0 {
				407	// This is assembly code. Don't assume it's well-formed.
				408	// TODO: Empirically we still need the fd == nil check. Why?
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	409	//
				410	// TODO: Are there cases that are safe but don't have a
				411	// locals pointer map, like empty frame functions?
Russ Cox	0388670	2021-05-06 11:38:46 -0400	[diff] [blame]	412	// It might be possible to preempt any assembly functions
				413	// except the ones that have funcFlag_SPWRITE set in f.flag.
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	414	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	415	}
Cherry Zhang	dcdee15	2019-12-18 15:19:05 -0500	[diff] [blame]	416	name := funcname(f)
				417	if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil {
				418	inltree := (*[1 << 20]inlinedCall)(inldata)
				419	ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil)
				420	if ix >= 0 {
				421	name = funcnameFromNameoff(f, inltree[ix].func_)
				422	}
				423	}
				424	if hasPrefix(name, "runtime.") \|\|
				425	hasPrefix(name, "runtime/internal/") \|\|
				426	hasPrefix(name, "reflect.") {
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	427	// For now we never async preempt the runtime or
				428	// anything closely tied to the runtime. Known issues
				429	// include: various points in the scheduler ("don't
				430	// preempt between here and here"), much of the defer
				431	// implementation (untyped info on stack), bulk write
				432	// barriers (write barrier check),
				433	// reflect.{makeFuncStub,methodValueCall}.
				434	//
				435	// TODO(austin): We should improve this, or opt things
				436	// in incrementally.
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	437	return false, 0
Austin Clements	62e53b7	2019-10-08 13:23:51 -0400	[diff] [blame]	438	}
Cherry Zhang	8414b1a	2020-10-21 20:43:16 -0400	[diff] [blame]	439	switch up {
				440	case _PCDATA_Restart1, _PCDATA_Restart2:
				441	// Restartable instruction sequence. Back off PC to
				442	// the start PC.
				443	if startpc == 0 \|\| startpc > pc \|\| pc-startpc > 20 {
				444	throw("bad restart PC")
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	445	}
Cherry Zhang	8414b1a	2020-10-21 20:43:16 -0400	[diff] [blame]	446	return true, startpc
				447	case _PCDATA_RestartAtEntry:
				448	// Restart from the function entry at resumption.
Josh Bleecher Snyder	61a0a70	2021-09-21 14:05:57 -0700	[diff] [blame]	449	return true, f.entry()
Cherry Zhang	ee33038	2019-11-20 17:10:34 -0500	[diff] [blame]	450	}
				451	return true, pc
Austin Clements	a3ffb0d	2019-10-16 19:10:06 -0400	[diff] [blame]	452	}