blob: 57ec493b8d0469ef22fb14898099421fcf9929fd [file] [log] [blame]
Austin Clements3f834112019-09-27 12:27:51 -04001// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Goroutine preemption
6//
7// A goroutine can be preempted at any safe-point. Currently, there
8// are a few categories of safe-points:
9//
10// 1. A blocked safe-point occurs for the duration that a goroutine is
11// descheduled, blocked on synchronization, or in a system call.
12//
13// 2. Synchronous safe-points occur when a running goroutine checks
14// for a preemption request.
15//
16// At both blocked and synchronous safe-points, a goroutine's CPU
17// state is minimal and the garbage collector has complete information
18// about its entire stack. This makes it possible to deschedule a
19// goroutine with minimal space, and to precisely scan a goroutine's
20// stack.
21//
22// Synchronous safe-points are implemented by overloading the stack
23// bound check in function prologues. To preempt a goroutine at the
24// next synchronous safe-point, the runtime poisons the goroutine's
25// stack bound to a value that will cause the next stack bound check
26// to fail and enter the stack growth implementation, which will
27// detect that it was actually a preemption and redirect to preemption
28// handling.
29
30package runtime
31
32type suspendGState struct {
33 g *g
34
35 // dead indicates the goroutine was not suspended because it
36 // is dead. This goroutine could be reused after the dead
37 // state was observed, so the caller must not assume that it
38 // remains dead.
39 dead bool
40
41 // stopped indicates that this suspendG transitioned the G to
42 // _Gwaiting via g.preemptStop and thus is responsible for
43 // readying it when done.
44 stopped bool
45}
46
47// suspendG suspends goroutine gp at a safe-point and returns the
48// state of the suspended goroutine. The caller gets read access to
49// the goroutine until it calls resumeG.
50//
51// It is safe for multiple callers to attempt to suspend the same
52// goroutine at the same time. The goroutine may execute between
53// subsequent successful suspend operations. The current
54// implementation grants exclusive access to the goroutine, and hence
55// multiple callers will serialize. However, the intent is to grant
56// shared read access, so please don't depend on exclusive access.
57//
58// This must be called from the system stack and the user goroutine on
59// the current M (if any) must be in a preemptible state. This
60// prevents deadlocks where two goroutines attempt to suspend each
61// other and both are in non-preemptible states. There are other ways
62// to resolve this deadlock, but this seems simplest.
63//
64// TODO(austin): What if we instead required this to be called from a
65// user goroutine? Then we could deschedule the goroutine while
66// waiting instead of blocking the thread. If two goroutines tried to
67// suspend each other, one of them would win and the other wouldn't
68// complete the suspend until it was resumed. We would have to be
69// careful that they couldn't actually queue up suspend for each other
70// and then both be suspended. This would also avoid the need for a
71// kernel context switch in the synchronous case because we could just
72// directly schedule the waiter. The context switch is unavoidable in
73// the signal case.
74//
75//go:systemstack
76func suspendG(gp *g) suspendGState {
77 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning {
78 // Since we're on the system stack of this M, the user
79 // G is stuck at an unsafe point. If another goroutine
80 // were to try to preempt m.curg, it could deadlock.
81 throw("suspendG from non-preemptible goroutine")
82 }
83
84 // See https://golang.org/cl/21503 for justification of the yield delay.
85 const yieldDelay = 10 * 1000
86 var nextYield int64
87
88 // Drive the goroutine to a preemption point.
89 stopped := false
90 for i := 0; ; i++ {
91 switch s := readgstatus(gp); s {
92 default:
93 if s&_Gscan != 0 {
94 // Someone else is suspending it. Wait
95 // for them to finish.
96 //
97 // TODO: It would be nicer if we could
98 // coalesce suspends.
99 break
100 }
101
102 dumpgstatus(gp)
103 throw("invalid g status")
104
105 case _Gdead:
106 // Nothing to suspend.
107 //
108 // preemptStop may need to be cleared, but
109 // doing that here could race with goroutine
110 // reuse. Instead, goexit0 clears it.
111 return suspendGState{dead: true}
112
113 case _Gcopystack:
114 // The stack is being copied. We need to wait
115 // until this is done.
116
117 case _Gpreempted:
118 // We (or someone else) suspended the G. Claim
119 // ownership of it by transitioning it to
120 // _Gwaiting.
121 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) {
122 break
123 }
124
125 // We stopped the G, so we have to ready it later.
126 stopped = true
127
128 s = _Gwaiting
129 fallthrough
130
131 case _Grunnable, _Gsyscall, _Gwaiting:
132 // Claim goroutine by setting scan bit.
133 // This may race with execution or readying of gp.
134 // The scan bit keeps it from transition state.
135 if !castogscanstatus(gp, s, s|_Gscan) {
136 break
137 }
138
139 // Clear the preemption request. It's safe to
140 // reset the stack guard because we hold the
141 // _Gscan bit and thus own the stack.
142 gp.preemptStop = false
143 gp.preempt = false
144 gp.stackguard0 = gp.stack.lo + _StackGuard
145
146 // The goroutine was already at a safe-point
147 // and we've now locked that in.
148 //
149 // TODO: It would be much better if we didn't
150 // leave it in _Gscan, but instead gently
151 // prevented its scheduling until resumption.
152 // Maybe we only use this to bump a suspended
153 // count and the scheduler skips suspended
154 // goroutines? That wouldn't be enough for
155 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe
156 // for all those transitions we need to check
157 // suspended and deschedule?
158 return suspendGState{g: gp, stopped: stopped}
159
160 case _Grunning:
161 // Optimization: if there is already a pending preemption request
162 // (from the previous loop iteration), don't bother with the atomics.
163 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt {
164 break
165 }
166
167 // Temporarily block state transitions.
168 if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
169 break
170 }
171
172 // Request synchronous preemption.
173 gp.preemptStop = true
174 gp.preempt = true
175 gp.stackguard0 = stackPreempt
176
177 // TODO: Inject asynchronous preemption.
178
179 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
180 }
181
182 // TODO: Don't busy wait. This loop should really only
183 // be a simple read/decide/CAS loop that only fails if
184 // there's an active race. Once the CAS succeeds, we
185 // should queue up the preemption (which will require
186 // it to be reliable in the _Grunning case, not
187 // best-effort) and then sleep until we're notified
188 // that the goroutine is suspended.
189 if i == 0 {
190 nextYield = nanotime() + yieldDelay
191 }
192 if nanotime() < nextYield {
193 procyield(10)
194 } else {
195 osyield()
196 nextYield = nanotime() + yieldDelay/2
197 }
198 }
199}
200
201// resumeG undoes the effects of suspendG, allowing the suspended
202// goroutine to continue from its current safe-point.
203func resumeG(state suspendGState) {
204 if state.dead {
205 // We didn't actually stop anything.
206 return
207 }
208
209 gp := state.g
210 switch s := readgstatus(gp); s {
211 default:
212 dumpgstatus(gp)
213 throw("unexpected g status")
214
215 case _Grunnable | _Gscan,
216 _Gwaiting | _Gscan,
217 _Gsyscall | _Gscan:
218 casfrom_Gscanstatus(gp, s, s&^_Gscan)
219 }
220
221 if state.stopped {
222 // We stopped it, so we need to re-schedule it.
223 ready(gp, 0, true)
224 }
225}
Austin Clementsd1969012019-10-04 18:54:00 -0400226
227// canPreemptM reports whether mp is in a state that is safe to preempt.
228//
229// It is nosplit because it has nosplit callers.
230//
231//go:nosplit
232func canPreemptM(mp *m) bool {
233 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning
234}
Austin Clementsa3ffb0d2019-10-16 19:10:06 -0400235
236//go:generate go run mkpreempt.go
237
238// asyncPreempt saves all user registers and calls asyncPreempt2.
239//
240// When stack scanning encounters an asyncPreempt frame, it scans that
241// frame and its parent frame conservatively.
242//
243// asyncPreempt is implemented in assembly.
244func asyncPreempt()
245
246//go:nosplit
247func asyncPreempt2() {
248 // TODO: Enter scheduler
249}