blob: 18566a745987251ce3aae3b4262d825cef856c3c [file] [log] [blame]
Austin Clements3f834112019-09-27 12:27:51 -04001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Goroutine preemption
6//
7// A goroutine can be preempted at any safe-point. Currently, there
8// are a few categories of safe-points:
9//
10// 1. A blocked safe-point occurs for the duration that a goroutine is
11// descheduled, blocked on synchronization, or in a system call.
12//
13// 2. Synchronous safe-points occur when a running goroutine checks
14// for a preemption request.
15//
Austin Clements62e53b72019-10-08 13:23:51 -040016// 3. Asynchronous safe-points occur at any instruction in user code
17// where the goroutine can be safely paused and a conservative
18// stack and register scan can find stack roots. The runtime can
19// stop a goroutine at an async safe-point using a signal.
20//
Austin Clements3f834112019-09-27 12:27:51 -040021// At both blocked and synchronous safe-points, a goroutine's CPU
22// state is minimal and the garbage collector has complete information
23// about its entire stack. This makes it possible to deschedule a
24// goroutine with minimal space, and to precisely scan a goroutine's
25// stack.
26//
27// Synchronous safe-points are implemented by overloading the stack
28// bound check in function prologues. To preempt a goroutine at the
29// next synchronous safe-point, the runtime poisons the goroutine's
30// stack bound to a value that will cause the next stack bound check
31// to fail and enter the stack growth implementation, which will
32// detect that it was actually a preemption and redirect to preemption
33// handling.
Austin Clements62e53b72019-10-08 13:23:51 -040034//
35// Preemption at asynchronous safe-points is implemented by suspending
36// the thread using an OS mechanism (e.g., signals) and inspecting its
37// state to determine if the goroutine was at an asynchronous
38// safe-point. Since the thread suspension itself is generally
39// asynchronous, it also checks if the running goroutine wants to be
40// preempted, since this could have changed. If all conditions are
41// satisfied, it adjusts the signal context to make it look like the
42// signaled thread just called asyncPreempt and resumes the thread.
43// asyncPreempt spills all registers and enters the scheduler.
44//
45// (An alternative would be to preempt in the signal handler itself.
46// This would let the OS save and restore the register state and the
47// runtime would only need to know how to extract potentially
48// pointer-containing registers from the signal context. However, this
49// would consume an M for every preempted G, and the scheduler itself
50// is not designed to run from a signal handler, as it tends to
51// allocate memory and start threads in the preemption path.)
Austin Clements3f834112019-09-27 12:27:51 -040052
53package runtime
54
Austin Clements62e53b72019-10-08 13:23:51 -040055import (
Cherry Muifb42fb72021-05-20 18:55:47 -040056 "internal/abi"
Michael Anthony Knyszek6d858912021-06-16 23:05:44 +000057 "internal/goarch"
Michael Anthony Knyszek9c58e392021-06-17 19:10:18 +000058 "runtime/internal/atomic"
Cherry Zhang3873e542019-10-20 17:23:02 -040059 "unsafe"
Austin Clements62e53b72019-10-08 13:23:51 -040060)
61
Austin Clements3f834112019-09-27 12:27:51 -040062type suspendGState struct {
63 g *g
64
65 // dead indicates the goroutine was not suspended because it
66 // is dead. This goroutine could be reused after the dead
67 // state was observed, so the caller must not assume that it
68 // remains dead.
69 dead bool
70
71 // stopped indicates that this suspendG transitioned the G to
72 // _Gwaiting via g.preemptStop and thus is responsible for
73 // readying it when done.
74 stopped bool
75}
76
77// suspendG suspends goroutine gp at a safe-point and returns the
78// state of the suspended goroutine. The caller gets read access to
79// the goroutine until it calls resumeG.
80//
81// It is safe for multiple callers to attempt to suspend the same
82// goroutine at the same time. The goroutine may execute between
83// subsequent successful suspend operations. The current
84// implementation grants exclusive access to the goroutine, and hence
85// multiple callers will serialize. However, the intent is to grant
86// shared read access, so please don't depend on exclusive access.
87//
88// This must be called from the system stack and the user goroutine on
89// the current M (if any) must be in a preemptible state. This
90// prevents deadlocks where two goroutines attempt to suspend each
91// other and both are in non-preemptible states. There are other ways
92// to resolve this deadlock, but this seems simplest.
93//
94// TODO(austin): What if we instead required this to be called from a
95// user goroutine? Then we could deschedule the goroutine while
96// waiting instead of blocking the thread. If two goroutines tried to
97// suspend each other, one of them would win and the other wouldn't
98// complete the suspend until it was resumed. We would have to be
99// careful that they couldn't actually queue up suspend for each other
100// and then both be suspended. This would also avoid the need for a
101// kernel context switch in the synchronous case because we could just
102// directly schedule the waiter. The context switch is unavoidable in
103// the signal case.
104//
105//go:systemstack
106func suspendG(gp *g) suspendGState {
107 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
108 // Since we're on the system stack of this M, the user
109 // G is stuck at an unsafe point. If another goroutine
110 // were to try to preempt m.curg, it could deadlock.
111 throw("suspendG from non-preemptible goroutine")
112 }
113
114 // See https://golang.org/cl/21503 for justification of the yield delay.
115 const yieldDelay = 10 * 1000
116 var nextYield int64
117
118 // Drive the goroutine to a preemption point.
119 stopped := false
Austin Clements62e53b72019-10-08 13:23:51 -0400120 var asyncM *m
121 var asyncGen uint32
Austin Clementsb89b4622019-10-25 16:17:41 -0400122 var nextPreemptM int64
Austin Clements3f834112019-09-27 12:27:51 -0400123 for i := 0; ; i++ {
124 switch s := readgstatus(gp); s {
125 default:
126 if s&_Gscan != 0 {
127 // Someone else is suspending it. Wait
128 // for them to finish.
129 //
130 // TODO: It would be nicer if we could
131 // coalesce suspends.
132 break
133 }
134
135 dumpgstatus(gp)
136 throw("invalid g status")
137
138 case _Gdead:
139 // Nothing to suspend.
140 //
141 // preemptStop may need to be cleared, but
142 // doing that here could race with goroutine
143 // reuse. Instead, goexit0 clears it.
144 return suspendGState{dead: true}
145
146 case _Gcopystack:
147 // The stack is being copied. We need to wait
148 // until this is done.
149
150 case _Gpreempted:
151 // We (or someone else) suspended the G. Claim
152 // ownership of it by transitioning it to
153 // _Gwaiting.
154 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
155 break
156 }
157
158 // We stopped the G, so we have to ready it later.
159 stopped = true
160
161 s = _Gwaiting
162 fallthrough
163
164 case _Grunnable, _Gsyscall, _Gwaiting:
165 // Claim goroutine by setting scan bit.
166 // This may race with execution or readying of gp.
167 // The scan bit keeps it from transition state.
168 if !castogscanstatus(gp, s, s|_Gscan) {
169 break
170 }
171
172 // Clear the preemption request. It's safe to
173 // reset the stack guard because we hold the
174 // _Gscan bit and thus own the stack.
175 gp.preemptStop = false
176 gp.preempt = false
177 gp.stackguard0 = gp.stack.lo + _StackGuard
178
179 // The goroutine was already at a safe-point
180 // and we've now locked that in.
181 //
182 // TODO: It would be much better if we didn't
183 // leave it in _Gscan, but instead gently
184 // prevented its scheduling until resumption.
185 // Maybe we only use this to bump a suspended
186 // count and the scheduler skips suspended
187 // goroutines? That wouldn't be enough for
188 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
189 // for all those transitions we need to check
190 // suspended and deschedule?
191 return suspendGState{g: gp, stopped: stopped}
192
193 case _Grunning:
194 // Optimization: if there is already a pending preemption request
195 // (from the previous loop iteration), don't bother with the atomics.
Austin Clements62e53b72019-10-08 13:23:51 -0400196 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen {
Austin Clements3f834112019-09-27 12:27:51 -0400197 break
198 }
199
200 // Temporarily block state transitions.
201 if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
202 break
203 }
204
205 // Request synchronous preemption.
206 gp.preemptStop = true
207 gp.preempt = true
208 gp.stackguard0 = stackPreempt
209
Austin Clementsb89b4622019-10-25 16:17:41 -0400210 // Prepare for asynchronous preemption.
211 asyncM2 := gp.m
212 asyncGen2 := atomic.Load(&asyncM2.preemptGen)
213 needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
214 asyncM = asyncM2
215 asyncGen = asyncGen2
Austin Clements3f834112019-09-27 12:27:51 -0400216
217 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
Austin Clementsb89b4622019-10-25 16:17:41 -0400218
219 // Send asynchronous preemption. We do this
220 // after CASing the G back to _Grunning
221 // because preemptM may be synchronous and we
222 // don't want to catch the G just spinning on
223 // its status.
224 if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
225 // Rate limit preemptM calls. This is
226 // particularly important on Windows
227 // where preemptM is actually
228 // synchronous and the spin loop here
229 // can lead to live-lock.
230 now := nanotime()
231 if now >= nextPreemptM {
232 nextPreemptM = now + yieldDelay/2
233 preemptM(asyncM)
234 }
235 }
Austin Clements3f834112019-09-27 12:27:51 -0400236 }
237
238 // TODO: Don't busy wait. This loop should really only
239 // be a simple read/decide/CAS loop that only fails if
240 // there's an active race. Once the CAS succeeds, we
241 // should queue up the preemption (which will require
242 // it to be reliable in the _Grunning case, not
243 // best-effort) and then sleep until we're notified
244 // that the goroutine is suspended.
245 if i == 0 {
246 nextYield = nanotime() + yieldDelay
247 }
248 if nanotime() < nextYield {
249 procyield(10)
250 } else {
251 osyield()
252 nextYield = nanotime() + yieldDelay/2
253 }
254 }
255}
256
257// resumeG undoes the effects of suspendG, allowing the suspended
258// goroutine to continue from its current safe-point.
259func resumeG(state suspendGState) {
260 if state.dead {
261 // We didn't actually stop anything.
262 return
263 }
264
265 gp := state.g
266 switch s := readgstatus(gp); s {
267 default:
268 dumpgstatus(gp)
269 throw("unexpected g status")
270
271 case _Grunnable | _Gscan,
272 _Gwaiting | _Gscan,
273 _Gsyscall | _Gscan:
274 casfrom_Gscanstatus(gp, s, s&^_Gscan)
275 }
276
277 if state.stopped {
278 // We stopped it, so we need to re-schedule it.
279 ready(gp, 0, true)
280 }
281}
Austin Clementsd1969012019-10-04 18:54:00 -0400282
283// canPreemptM reports whether mp is in a state that is safe to preempt.
284//
285// It is nosplit because it has nosplit callers.
286//
287//go:nosplit
288func canPreemptM(mp *m) bool {
289 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
290}
Austin Clementsa3ffb0d2019-10-16 19:10:06 -0400291
292//go:generate go run mkpreempt.go
293
294// asyncPreempt saves all user registers and calls asyncPreempt2.
295//
296// When stack scanning encounters an asyncPreempt frame, it scans that
297// frame and its parent frame conservatively.
298//
299// asyncPreempt is implemented in assembly.
300func asyncPreempt()
301
302//go:nosplit
303func asyncPreempt2() {
Austin Clements62e53b72019-10-08 13:23:51 -0400304 gp := getg()
305 gp.asyncSafePoint = true
Austin Clements177a36a2019-10-12 21:23:29 -0400306 if gp.preemptStop {
307 mcall(preemptPark)
308 } else {
309 mcall(gopreempt_m)
310 }
Austin Clements62e53b72019-10-08 13:23:51 -0400311 gp.asyncSafePoint = false
312}
313
314// asyncPreemptStack is the bytes of stack space required to inject an
315// asyncPreempt call.
316var asyncPreemptStack = ^uintptr(0)
317
318func init() {
Cherry Muifb42fb72021-05-20 18:55:47 -0400319 f := findfunc(abi.FuncPCABI0(asyncPreempt))
Austin Clements62e53b72019-10-08 13:23:51 -0400320 total := funcMaxSPDelta(f)
Cherry Mui626e89c2021-05-21 13:37:19 -0400321 f = findfunc(abi.FuncPCABIInternal(asyncPreempt2))
Austin Clements62e53b72019-10-08 13:23:51 -0400322 total += funcMaxSPDelta(f)
323 // Add some overhead for return PCs, etc.
Michael Anthony Knyszek6d858912021-06-16 23:05:44 +0000324 asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize
Austin Clements62e53b72019-10-08 13:23:51 -0400325 if asyncPreemptStack > _StackLimit {
326 // We need more than the nosplit limit. This isn't
327 // unsafe, but it may limit asynchronous preemption.
328 //
329 // This may be a problem if we start using more
330 // registers. In that case, we should store registers
331 // in a context object. If we pre-allocate one per P,
332 // asyncPreempt can spill just a few registers to the
333 // stack, then grab its context object and spill into
334 // it. When it enters the runtime, it would allocate a
335 // new context for the P.
336 print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n")
337 throw("async stack too large")
338 }
339}
340
341// wantAsyncPreempt returns whether an asynchronous preemption is
342// queued for gp.
343func wantAsyncPreempt(gp *g) bool {
Austin Clements177a36a2019-10-12 21:23:29 -0400344 // Check both the G and the P.
345 return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning
Austin Clements62e53b72019-10-08 13:23:51 -0400346}
347
348// isAsyncSafePoint reports whether gp at instruction PC is an
349// asynchronous safe point. This indicates that:
350//
351// 1. It's safe to suspend gp and conservatively scan its stack and
352// registers. There are no potentially hidden pointer values and it's
353// not in the middle of an atomic sequence like a write barrier.
354//
355// 2. gp has enough stack space to inject the asyncPreempt call.
356//
357// 3. It's generally safe to interact with the runtime, even if we're
358// in a signal handler stopped here. For example, there are no runtime
359// locks held, so acquiring a runtime lock won't self-deadlock.
Cherry Zhangee330382019-11-20 17:10:34 -0500360//
361// In some cases the PC is safe for asynchronous preemption but it
362// also needs to adjust the resumption PC. The new PC is returned in
363// the second result.
364func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) {
Austin Clements62e53b72019-10-08 13:23:51 -0400365 mp := gp.m
366
367 // Only user Gs can have safe-points. We check this first
368 // because it's extremely common that we'll catch mp in the
369 // scheduler processing this G preemption.
370 if mp.curg != gp {
Cherry Zhangee330382019-11-20 17:10:34 -0500371 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400372 }
373
374 // Check M state.
375 if mp.p == 0 || !canPreemptM(mp) {
Cherry Zhangee330382019-11-20 17:10:34 -0500376 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400377 }
378
379 // Check stack space.
380 if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack {
Cherry Zhangee330382019-11-20 17:10:34 -0500381 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400382 }
383
384 // Check if PC is an unsafe-point.
385 f := findfunc(pc)
386 if !f.valid() {
387 // Not Go code.
Cherry Zhangee330382019-11-20 17:10:34 -0500388 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400389 }
Cherry Zhanga930fed2019-10-26 22:54:28 -0400390 if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc, nil) == 0 {
391 // We probably stopped at a half-executed CALL instruction,
392 // where the LR is updated but the PC has not. If we preempt
393 // here we'll see a seemingly self-recursive call, which is in
394 // fact not.
395 // This is normally ok, as we use the return address saved on
396 // stack for unwinding, not the LR value. But if this is a
397 // call to morestack, we haven't created the frame, and we'll
398 // use the LR for unwinding, which will be bad.
Cherry Zhangee330382019-11-20 17:10:34 -0500399 return false, 0
Cherry Zhanga930fed2019-10-26 22:54:28 -0400400 }
Cherry Zhang8414b1a2020-10-21 20:43:16 -0400401 up, startpc := pcdatavalue2(f, _PCDATA_UnsafePoint, pc)
Cherry Mui8ff16c12021-08-04 19:41:19 -0400402 if up == _PCDATA_UnsafePointUnsafe {
Cherry Zhang8414b1a2020-10-21 20:43:16 -0400403 // Unsafe-point marked by compiler. This includes
404 // atomic sequences (e.g., write barrier) and nosplit
405 // functions (except at calls).
406 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400407 }
Cherry Zhang3873e542019-10-20 17:23:02 -0400408 if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil || fd == unsafe.Pointer(&no_pointers_stackmap) {
Austin Clements62e53b72019-10-08 13:23:51 -0400409 // This is assembly code. Don't assume it's
Cherry Zhang3873e542019-10-20 17:23:02 -0400410 // well-formed. We identify assembly code by
411 // checking that it has either no stack map, or
412 // no_pointers_stackmap, which is the stack map
413 // for ones marked as NO_LOCAL_POINTERS.
Austin Clements62e53b72019-10-08 13:23:51 -0400414 //
415 // TODO: Are there cases that are safe but don't have a
416 // locals pointer map, like empty frame functions?
Russ Cox03886702021-05-06 11:38:46 -0400417 // It might be possible to preempt any assembly functions
418 // except the ones that have funcFlag_SPWRITE set in f.flag.
Cherry Zhangee330382019-11-20 17:10:34 -0500419 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400420 }
Cherry Zhangdcdee152019-12-18 15:19:05 -0500421 name := funcname(f)
422 if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil {
423 inltree := (*[1 << 20]inlinedCall)(inldata)
424 ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil)
425 if ix >= 0 {
426 name = funcnameFromNameoff(f, inltree[ix].func_)
427 }
428 }
429 if hasPrefix(name, "runtime.") ||
430 hasPrefix(name, "runtime/internal/") ||
431 hasPrefix(name, "reflect.") {
Austin Clements62e53b72019-10-08 13:23:51 -0400432 // For now we never async preempt the runtime or
433 // anything closely tied to the runtime. Known issues
434 // include: various points in the scheduler ("don't
435 // preempt between here and here"), much of the defer
436 // implementation (untyped info on stack), bulk write
437 // barriers (write barrier check),
438 // reflect.{makeFuncStub,methodValueCall}.
439 //
440 // TODO(austin): We should improve this, or opt things
441 // in incrementally.
Cherry Zhangee330382019-11-20 17:10:34 -0500442 return false, 0
Austin Clements62e53b72019-10-08 13:23:51 -0400443 }
Cherry Zhang8414b1a2020-10-21 20:43:16 -0400444 switch up {
445 case _PCDATA_Restart1, _PCDATA_Restart2:
446 // Restartable instruction sequence. Back off PC to
447 // the start PC.
448 if startpc == 0 || startpc > pc || pc-startpc > 20 {
449 throw("bad restart PC")
Cherry Zhangee330382019-11-20 17:10:34 -0500450 }
Cherry Zhang8414b1a2020-10-21 20:43:16 -0400451 return true, startpc
452 case _PCDATA_RestartAtEntry:
453 // Restart from the function entry at resumption.
Josh Bleecher Snyder61a0a702021-09-21 14:05:57 -0700454 return true, f.entry()
Cherry Zhangee330382019-11-20 17:10:34 -0500455 }
456 return true, pc
Austin Clementsa3ffb0d2019-10-16 19:10:06 -0400457}
Cherry Zhang3873e542019-10-20 17:23:02 -0400458
459var no_pointers_stackmap uint64 // defined in assembly, for NO_LOCAL_POINTERS macro