From cb273d013a1fe826fef96e1341e71330eab172c0 Mon Sep 17 00:00:00 2001 From: Aaron Jacobs Date: Tue, 5 May 2015 15:06:01 +1000 Subject: [PATCH] Moved per-PID tracing to Connection. This lets it happen before per-request cancellation, which otherwise causes all future ops to start cancelled after one finishes. --- connection.go | 100 ++++++++++++++++++++++++++++++++++++++++++- fuseops/common_op.go | 93 ---------------------------------------- 2 files changed, 99 insertions(+), 94 deletions(-) diff --git a/connection.go b/connection.go index 25d3c86..55caa96 100644 --- a/connection.go +++ b/connection.go @@ -15,18 +15,29 @@ package fuse import ( + "flag" "fmt" "log" "path" "runtime" "sync" + "time" "golang.org/x/net/context" + "golang.org/x/sys/unix" "github.com/jacobsa/bazilfuse" "github.com/jacobsa/fuse/fuseops" + "github.com/jacobsa/reqtrace" ) +var fTraceByPID = flag.Bool( + "fuse.trace_by_pid", + false, + "Enable a hacky mode that uses reqtrace to group all ops from each "+ + "individual PID. Not a good idea to use in production; races, bugs, and "+ + "resource leaks likely lurk.") + // A connection to the fuse kernel process. type Connection struct { logger *log.Logger @@ -46,6 +57,11 @@ type Connection struct { // // GUARDED_BY(mu) cancelFuncs map[bazilfuse.RequestID]func() + + // A map from PID to a traced context for that PID. + // + // GUARDED_BY(mu) + pidMap map[int]context.Context } // Responsibility for closing the wrapped connection is transferred to the @@ -59,6 +75,7 @@ func newConnection( wrapped: wrapped, parentCtx: parentCtx, cancelFuncs: make(map[bazilfuse.RequestID]func()), + pidMap: make(map[int]context.Context), } return @@ -108,6 +125,85 @@ func (c *Connection) recordCancelFunc( c.cancelFuncs[reqID] = f } +// Wait until the process completes, then close off the trace and remove the +// context from the map. +// +// LOCKS_EXCLUDED(c.mu) +func (c *Connection) reportWhenPIDGone( + pid int, + ctx context.Context, + report reqtrace.ReportFunc) { + // HACK(jacobsa): Poll until the process no longer exists. + const pollPeriod = 50 * time.Millisecond + for { + // The man page for kill(2) says that if the signal is zero, then "no + // signal is sent, but error checking is still performed; this can be used + // to check for the existence of a process ID". + err := unix.Kill(pid, 0) + + // ESRCH means the process is gone. + if err == unix.ESRCH { + break + } + + // If we receive EPERM, we're not going to be able to do what we want. We + // don't really have any choice but to print info and leak. + if err == unix.EPERM { + log.Printf("Failed to kill(2) PID %v; no permissions. Leaking trace.", pid) + return + } + + // Otherwise, panic. + if err != nil { + panic(fmt.Errorf("Kill(%v): %v", pid, err)) + } + + time.Sleep(pollPeriod) + } + + // Finish up. + report(nil) + + c.mu.Lock() + delete(c.pidMap, pid) + c.mu.Unlock() +} + +// Set up a hacky per-PID trace context, if enabled. Either way, return a +// context from which an operation should inherit. +// +// See notes on fTraceByPID. +// +// LOCKS_EXCLUDED(c.mu) +func (c *Connection) maybeTraceByPID( + pid int) (ctx context.Context) { + ctx = c.parentCtx + + // Is there anything to do? + if !reqtrace.Enabled() || !*fTraceByPID { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + // Do we already have a traced context for this PID? + if existing, ok := c.pidMap[pid]; ok { + ctx = existing + return + } + + // Set up a new one and stick it in the map. + var report reqtrace.ReportFunc + ctx, report = reqtrace.Trace(ctx, fmt.Sprintf("Requests from PID %v", pid)) + c.pidMap[pid] = ctx + + // Ensure we close the trace and remove it from the map eventually. + go c.reportWhenPIDGone(pid, ctx, report) + + return +} + // Set up state for an op that is about to be returned to the user, given its // underlying bazilfuse request. // @@ -121,6 +217,9 @@ func (c *Connection) beginOp( // Note that the op is in flight. c.opsInFlight.Add(1) + // Choose a parent context. + ctx = c.maybeTraceByPID(int(bfReq.Hdr().Pid)) + // Set up a cancellation function. // // Special case: On Darwin, osxfuse appears to aggressively reuse "unique" @@ -129,7 +228,6 @@ func (c *Connection) beginOp( // for reuse. For these, we should not record any state keyed on their ID. // // Cf. https://github.com/osxfuse/osxfuse/issues/208 - ctx = c.parentCtx if _, ok := bfReq.(*bazilfuse.ForgetRequest); !ok { var cancel func() ctx, cancel = context.WithCancel(ctx) diff --git a/fuseops/common_op.go b/fuseops/common_op.go index e62f4ba..b5d4881 100644 --- a/fuseops/common_op.go +++ b/fuseops/common_op.go @@ -15,27 +15,15 @@ package fuseops import ( - "flag" "fmt" - "log" "reflect" "strings" - "sync" - "time" "github.com/jacobsa/bazilfuse" "github.com/jacobsa/reqtrace" "golang.org/x/net/context" - "golang.org/x/sys/unix" ) -var fTraceByPID = flag.Bool( - "fuse.trace_by_pid", - false, - "Enable a hacky mode that uses reqtrace to group all ops from each "+ - "individual PID. Not a good idea to use in production; races, bugs, and "+ - "resource leaks likely lurk.") - // An interface that all ops inside which commonOp is embedded must // implement. type internalOp interface { @@ -67,84 +55,6 @@ type commonOp struct { finished func(error) } -var gPIDMapMu sync.Mutex - -// A map from PID to a traced context for that PID. -// -// GUARDED_BY(gPIDMapMu) -var gPIDMap = make(map[int]context.Context) - -// Wait until the process completes, then close off the trace and remove the -// context from the map. -func reportWhenPIDGone( - pid int, - ctx context.Context, - report reqtrace.ReportFunc) { - // HACK(jacobsa): Poll until the process no longer exists. - const pollPeriod = 50 * time.Millisecond - for { - // The man page for kill(2) says that if the signal is zero, then "no - // signal is sent, but error checking is still performed; this can be used - // to check for the existence of a process ID". - err := unix.Kill(pid, 0) - - // ESRCH means the process is gone. - if err == unix.ESRCH { - break - } - - // If we receive EPERM, we're not going to be able to do what we want. We - // don't really have any choice but to print info and leak. - if err == unix.EPERM { - log.Printf("Failed to kill(2) PID %v; no permissions. Leaking trace.", pid) - return - } - - // Otherwise, panic. - if err != nil { - panic(fmt.Errorf("Kill(%v): %v", pid, err)) - } - - time.Sleep(pollPeriod) - } - - // Finish up. - report(nil) - - gPIDMapMu.Lock() - delete(gPIDMap, pid) - gPIDMapMu.Unlock() -} - -func (o *commonOp) maybeTraceByPID( - in context.Context, - pid int) (out context.Context) { - // Is there anything to do? - if !reqtrace.Enabled() || !*fTraceByPID { - out = in - return - } - - gPIDMapMu.Lock() - defer gPIDMapMu.Unlock() - - // Do we already have a traced context for this PID? - if existing, ok := gPIDMap[pid]; ok { - out = existing - return - } - - // Set up a new one and stick it in the map. - var report reqtrace.ReportFunc - out, report = reqtrace.Trace(in, fmt.Sprintf("Requests from PID %v", pid)) - gPIDMap[pid] = out - - // Ensure we close the trace and remove it from the map eventually. - go reportWhenPIDGone(pid, out, report) - - return -} - func (o *commonOp) ShortDesc() (desc string) { opName := reflect.TypeOf(o.op).String() @@ -175,9 +85,6 @@ func (o *commonOp) init( o.log = log o.finished = finished - // Set up a context that reflects per-PID tracing if appropriate. - o.ctx = o.maybeTraceByPID(o.ctx, int(bazilReq.Hdr().Pid)) - // Set up a trace span for this op. var reportForTrace reqtrace.ReportFunc o.ctx, reportForTrace = reqtrace.StartSpan(o.ctx, o.op.ShortDesc())