runtime: refactor runtime->tracer API to appear more like a lock

mknyszek · gopherbot · commit f119abb65dbe · 2023-11-09T22:34:25.000Z
Currently the execution tracer synchronizes with itself using very heavyweight operations. As a result, it's totally fine for most of the tracer code to look like: if traceEnabled() { traceXXX(...) } However, if we want to make that synchronization more lightweight (as issue #60773 proposes), then this is insufficient. In particular, we need to make sure the tracer can't observe an inconsistency between g atomicstatus and the event that would be emitted for a particular g transition. This means making the g status change appear to happen atomically with the corresponding trace event being written out from the perspective of the tracer. This requires a change in API to something more like a lock. While we're here, we might as well make sure that trace events can *only* be emitted while this lock is held. This change introduces such an API: traceAcquire, which returns a value that can emit events, and traceRelease, which requires the value that was returned by traceAcquire. In practice, this won't be a real lock, it'll be more like a seqlock. For the current tracer, this API is completely overkill and the value returned by traceAcquire basically just checks trace.enabled. But it's necessary for the tracer described in #60773 and we can implement that more cleanly if we do this refactoring now instead of later. For #60773. Change-Id: Ibb9ff5958376339fafc2b5180aef65cf2ba18646 Reviewed-on: https://go-review.googlesource.com/c/go/+/515635 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
diff --git a/src/runtime/debugcall.go b/src/runtime/debugcall.go
@@ -166,10 +166,12 @@ func debugCallWrap(dispatch uintptr) {
 		gp.schedlink = 0
 
 		// Park the calling goroutine.
-		if traceEnabled() {
-			traceGoPark(traceBlockDebugCall, 1)
-		}
+		trace := traceAcquire()
 		casGToWaiting(gp, _Grunning, waitReasonDebugCall)
+		if trace.ok() {
+			trace.GoPark(traceBlockDebugCall, 1)
+			traceRelease(trace)
+		}
 		dropg()
 
 		// Directly execute the new goroutine. The debug
@@ -225,19 +227,23 @@ func debugCallWrap1() {
 		// Switch back to the calling goroutine. At some point
 		// the scheduler will schedule us again and we'll
 		// finish exiting.
-		if traceEnabled() {
-			traceGoSched()
-		}
+		trace := traceAcquire()
 		casgstatus(gp, _Grunning, _Grunnable)
+		if trace.ok() {
+			trace.GoSched()
+			traceRelease(trace)
+		}
 		dropg()
 		lock(&sched.lock)
 		globrunqput(gp)
 		unlock(&sched.lock)
 
-		if traceEnabled() {
-			traceGoUnpark(callingG, 0)
-		}
+		trace = traceAcquire()
 		casgstatus(callingG, _Gwaiting, _Grunnable)
+		if trace.ok() {
+			trace.GoUnpark(callingG, 0)
+			traceRelease(trace)
+		}
 		execute(callingG, true)
 	})
 }
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
@@ -84,8 +84,10 @@ func (c *mcentral) cacheSpan() *mspan {
 	deductSweepCredit(spanBytes, 0)
 
 	traceDone := false
-	if traceEnabled() {
-		traceGCSweepStart()
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.GCSweepStart()
+		traceRelease(trace)
 	}
 
 	// If we sweep spanBudget spans without finding any free
@@ -157,9 +159,11 @@ func (c *mcentral) cacheSpan() *mspan {
 		}
 		sweep.active.end(sl)
 	}
-	if traceEnabled() {
-		traceGCSweepDone()
+	trace = traceAcquire()
+	if trace.ok() {
+		trace.GCSweepDone()
 		traceDone = true
+		traceRelease(trace)
 	}
 
 	// We failed to get a span from the mcentral so get one from mheap.
@@ -170,8 +174,12 @@ func (c *mcentral) cacheSpan() *mspan {
 
 	// At this point s is a span that should have free slots.
 havespan:
-	if traceEnabled() && !traceDone {
-		traceGCSweepDone()
+	if !traceDone {
+		trace := traceAcquire()
+		if trace.ok() {
+			trace.GCSweepDone()
+			traceRelease(trace)
+		}
 	}
 	n := int(s.nelems) - int(s.allocCount)
 	if n == 0 || s.freeindex == s.nelems || s.allocCount == s.nelems {
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
@@ -647,8 +647,10 @@ func gcStart(trigger gcTrigger) {
 	// Update it under gcsema to avoid gctrace getting wrong values.
 	work.userForced = trigger.kind == gcTriggerCycle
 
-	if traceEnabled() {
-		traceGCStart()
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.GCStart()
+		traceRelease(trace)
 	}
 
 	// Check that all Ps have finished deferred mcache flushes.
@@ -989,8 +991,10 @@ func gcMarkTermination() {
 	mp.traceback = 0
 	casgstatus(curgp, _Gwaiting, _Grunning)
 
-	if traceEnabled() {
-		traceGCDone()
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.GCDone()
+		traceRelease(trace)
 	}
 
 	// all done
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
@@ -420,7 +420,11 @@ retry:
 		// If the CPU limiter is enabled, intentionally don't
 		// assist to reduce the amount of CPU time spent in the GC.
 		if traced {
-			traceGCMarkAssistDone()
+			trace := traceAcquire()
+			if trace.ok() {
+				trace.GCMarkAssistDone()
+				traceRelease(trace)
+			}
 		}
 		return
 	}
@@ -461,15 +465,22 @@ retry:
 			// We were able to steal all of the credit we
 			// needed.
 			if traced {
-				traceGCMarkAssistDone()
+				trace := traceAcquire()
+				if trace.ok() {
+					trace.GCMarkAssistDone()
+					traceRelease(trace)
+				}
 			}
 			return
 		}
 	}
-
 	if traceEnabled() && !traced {
-		traced = true
-		traceGCMarkAssistStart()
+		trace := traceAcquire()
+		if trace.ok() {
+			traced = true
+			trace.GCMarkAssistStart()
+			traceRelease(trace)
+		}
 	}
 
 	// Perform assist work
@@ -515,7 +526,11 @@ retry:
 		// this G's assist debt, or the GC cycle is over.
 	}
 	if traced {
-		traceGCMarkAssistDone()
+		trace := traceAcquire()
+		if trace.ok() {
+			trace.GCMarkAssistDone()
+			traceRelease(trace)
+		}
 	}
 }
 
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go
@@ -807,9 +807,11 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
 
 	// Run the background mark worker.
 	gp := node.gp.ptr()
+	trace := traceAcquire()
 	casgstatus(gp, _Gwaiting, _Grunnable)
-	if traceEnabled() {
-		traceGoUnpark(gp, 0)
+	if trace.ok() {
+		trace.GoUnpark(gp, 0)
+		traceRelease(trace)
 	}
 	return gp, now
 }
@@ -828,8 +830,10 @@ func (c *gcControllerState) resetLive(bytesMarked uint64) {
 	c.triggered = ^uint64(0) // Reset triggered.
 
 	// heapLive was updated, so emit a trace event.
-	if traceEnabled() {
-		traceHeapAlloc(bytesMarked)
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.HeapAlloc(bytesMarked)
+		traceRelease(trace)
 	}
 }
 
@@ -856,10 +860,12 @@ func (c *gcControllerState) markWorkerStop(mode gcMarkWorkerMode, duration int64
 
 func (c *gcControllerState) update(dHeapLive, dHeapScan int64) {
 	if dHeapLive != 0 {
+		trace := traceAcquire()
 		live := gcController.heapLive.Add(dHeapLive)
-		if traceEnabled() {
+		if trace.ok() {
 			// gcController.heapLive changed.
-			traceHeapAlloc(live)
+			trace.HeapAlloc(live)
+			traceRelease(trace)
 		}
 	}
 	if gcBlackenEnabled == 0 {
@@ -1428,8 +1434,10 @@ func gcControllerCommit() {
 
 	// TODO(mknyszek): This isn't really accurate any longer because the heap
 	// goal is computed dynamically. Still useful to snapshot, but not as useful.
-	if traceEnabled() {
-		traceHeapGoal()
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.HeapGoal()
+		traceRelease(trace)
 	}
 
 	trigger, heapGoal := gcController.trigger()
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
@@ -516,8 +516,10 @@ func (sl *sweepLocked) sweep(preserve bool) bool {
 		throw("mspan.sweep: bad span state")
 	}
 
-	if traceEnabled() {
-		traceGCSweepSpan(s.npages * _PageSize)
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.GCSweepSpan(s.npages * _PageSize)
+		traceRelease(trace)
 	}
 
 	mheap_.pagesSwept.Add(int64(s.npages))
@@ -889,8 +891,10 @@ func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) {
 		return
 	}
 
-	if traceEnabled() {
-		traceGCSweepStart()
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.GCSweepStart()
+		traceRelease(trace)
 	}
 
 	// Fix debt if necessary.
@@ -929,8 +933,10 @@ retry:
 		}
 	}
 
-	if traceEnabled() {
-		traceGCSweepDone()
+	trace = traceAcquire()
+	if trace.ok() {
+		trace.GCSweepDone()
+		traceRelease(trace)
 	}
 }
 
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
@@ -791,8 +791,10 @@ func (h *mheap) reclaim(npage uintptr) {
 	// traceGCSweepStart/Done pair on the P.
 	mp := acquirem()
 
-	if traceEnabled() {
-		traceGCSweepStart()
+	trace := traceAcquire()
+	if trace.ok() {
+		trace.GCSweepStart()
+		traceRelease(trace)
 	}
 
 	arenas := h.sweepArenas
@@ -839,8 +841,10 @@ func (h *mheap) reclaim(npage uintptr) {
 		unlock(&h.lock)
 	}
 
-	if traceEnabled() {
-		traceGCSweepDone()
+	trace = traceAcquire()
+	if trace.ok() {
+		trace.GCSweepDone()
+		traceRelease(trace)
 	}
 	releasem(mp)
 }
@@ -911,10 +915,12 @@ func (h *mheap) reclaimChunk(arenas []arenaIdx, pageIdx, n uintptr) uintptr {
 		n -= uintptr(len(inUse) * 8)
 	}
 	sweep.active.end(sl)
-	if traceEnabled() {
+	trace := traceAcquire()
+	if trace.ok() {
 		unlock(&h.lock)
 		// Account for pages scanned but not reclaimed.
-		traceGCSweepSpan((n0 - nFreed) * pageSize)
+		trace.GCSweepSpan((n0 - nFreed) * pageSize)
+		traceRelease(trace)
 		lock(&h.lock)
 	}
 
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
diff --git a/src/runtime/trace.go b/src/runtime/trace.go

Original file line number	Diff line number	Diff line change
`@@ -420,7 +420,11 @@ retry:`
`420`	`420`	`// If the CPU limiter is enabled, intentionally don't`
`421`	`421`	`// assist to reduce the amount of CPU time spent in the GC.`
`422`	`422`	`if traced {`
`423`		`- traceGCMarkAssistDone()`
	`423`	`+ trace := traceAcquire()`
	`424`	`+ if trace.ok() {`
	`425`	`+ trace.GCMarkAssistDone()`
	`426`	`+ traceRelease(trace)`
	`427`	`+ }`
`424`	`428`	`}`
`425`	`429`	`return`
`426`	`430`	`}`
`@@ -461,15 +465,22 @@ retry:`
`461`	`465`	`// We were able to steal all of the credit we`
`462`	`466`	`// needed.`
`463`	`467`	`if traced {`
`464`		`- traceGCMarkAssistDone()`
	`468`	`+ trace := traceAcquire()`
	`469`	`+ if trace.ok() {`
	`470`	`+ trace.GCMarkAssistDone()`
	`471`	`+ traceRelease(trace)`
	`472`	`+ }`
`465`	`473`	`}`
`466`	`474`	`return`
`467`	`475`	`}`
`468`	`476`	`}`
`469`		`-`
`470`	`477`	`if traceEnabled() && !traced {`
`471`		`- traced = true`
`472`		`- traceGCMarkAssistStart()`
	`478`	`+ trace := traceAcquire()`
	`479`	`+ if trace.ok() {`
	`480`	`+ traced = true`
	`481`	`+ trace.GCMarkAssistStart()`
	`482`	`+ traceRelease(trace)`
	`483`	`+ }`
`473`	`484`	`}`
`474`	`485`
`475`	`486`	`// Perform assist work`
`@@ -515,7 +526,11 @@ retry:`
`515`	`526`	`// this G's assist debt, or the GC cycle is over.`
`516`	`527`	`}`
`517`	`528`	`if traced {`
`518`		`- traceGCMarkAssistDone()`
	`529`	`+ trace := traceAcquire()`
	`530`	`+ if trace.ok() {`
	`531`	`+ trace.GCMarkAssistDone()`
	`532`	`+ traceRelease(trace)`
	`533`	`+ }`
`519`	`534`	`}`
`520`	`535`	`}`
`521`	`536`
Original file line number	Diff line number	Diff line change
`@@ -516,8 +516,10 @@ func (sl *sweepLocked) sweep(preserve bool) bool {`
`516`	`516`	`throw("mspan.sweep: bad span state")`
`517`	`517`	`}`
`518`	`518`
`519`		`- if traceEnabled() {`
`520`		`- traceGCSweepSpan(s.npages * _PageSize)`
	`519`	`+ trace := traceAcquire()`
	`520`	`+ if trace.ok() {`
	`521`	`+ trace.GCSweepSpan(s.npages * _PageSize)`
	`522`	`+ traceRelease(trace)`
`521`	`523`	`}`
`522`	`524`
`523`	`525`	`mheap_.pagesSwept.Add(int64(s.npages))`
`@@ -889,8 +891,10 @@ func deductSweepCredit(spanBytes uintptr, callerSweepPages uintptr) {`
`889`	`891`	`return`
`890`	`892`	`}`
`891`	`893`
`892`		`- if traceEnabled() {`
`893`		`- traceGCSweepStart()`
	`894`	`+ trace := traceAcquire()`
	`895`	`+ if trace.ok() {`
	`896`	`+ trace.GCSweepStart()`
	`897`	`+ traceRelease(trace)`
`894`	`898`	`}`
`895`	`899`
`896`	`900`	`// Fix debt if necessary.`
`@@ -929,8 +933,10 @@ retry:`
`929`	`933`	`}`
`930`	`934`	`}`
`931`	`935`
`932`		`- if traceEnabled() {`
`933`		`- traceGCSweepDone()`
	`936`	`+ trace = traceAcquire()`
	`937`	`+ if trace.ok() {`
	`938`	`+ trace.GCSweepDone()`
	`939`	`+ traceRelease(trace)`
`934`	`940`	`}`
`935`	`941`	`}`
`936`	`942`