fusego/connection.go

362 lines
9.2 KiB
Go
Raw Normal View History

2015-03-24 07:19:42 +03:00
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fuse
2015-03-24 07:23:19 +03:00
import (
"flag"
"fmt"
2015-03-24 07:23:19 +03:00
"log"
"path"
"runtime"
"sync"
"time"
2015-03-24 07:23:19 +03:00
"golang.org/x/net/context"
"golang.org/x/sys/unix"
2015-03-24 07:23:19 +03:00
"github.com/jacobsa/bazilfuse"
"github.com/jacobsa/fuse/fuseops"
"github.com/jacobsa/reqtrace"
2015-03-24 07:23:19 +03:00
)
2015-03-24 07:19:42 +03:00
var fTraceByPID = flag.Bool(
"fuse.trace_by_pid",
false,
"Enable a hacky mode that uses reqtrace to group all ops from each "+
"individual PID. Not a good idea to use in production; races, bugs, and "+
"resource leaks likely lurk.")
2015-03-24 07:19:42 +03:00
// A connection to the fuse kernel process.
type Connection struct {
logger *log.Logger
wrapped *bazilfuse.Conn
opsInFlight sync.WaitGroup
2015-04-29 04:53:17 +03:00
// The context from which all op contexts inherit.
parentCtx context.Context
2015-04-29 04:53:17 +03:00
// For logging purposes only.
nextOpID uint32
2015-05-05 03:41:09 +03:00
mu sync.Mutex
// A map from bazilfuse request ID (*not* the op ID for logging used above)
// to a function that cancel's its associated context.
//
// GUARDED_BY(mu)
cancelFuncs map[bazilfuse.RequestID]func()
// A map from PID to a traced context for that PID.
//
// GUARDED_BY(mu)
pidMap map[int]context.Context
2015-03-24 07:19:42 +03:00
}
2015-03-24 07:34:50 +03:00
// Responsibility for closing the wrapped connection is transferred to the
// result. You must call c.close() eventually.
func newConnection(
parentCtx context.Context,
2015-03-24 07:34:50 +03:00
logger *log.Logger,
2015-03-24 07:36:09 +03:00
wrapped *bazilfuse.Conn) (c *Connection, err error) {
c = &Connection{
2015-05-05 03:42:17 +03:00
logger: logger,
wrapped: wrapped,
parentCtx: parentCtx,
cancelFuncs: make(map[bazilfuse.RequestID]func()),
pidMap: make(map[int]context.Context),
2015-03-24 07:36:09 +03:00
}
return
}
2015-03-24 07:23:19 +03:00
2015-04-29 04:53:17 +03:00
// Log information for an operation with the given ID. calldepth is the depth
// to use when recovering file:line information with runtime.Caller.
func (c *Connection) log(
2015-04-29 04:53:17 +03:00
opID uint32,
calldepth int,
format string,
v ...interface{}) {
// Get file:line info.
var file string
var line int
var ok bool
_, file, line, ok = runtime.Caller(calldepth)
if !ok {
file = "???"
}
2015-04-29 05:11:34 +03:00
fileLine := fmt.Sprintf("%v:%v", path.Base(file), line)
// Format the actual message to be printed.
msg := fmt.Sprintf(
2015-04-29 05:11:34 +03:00
"Op 0x%08x %24s] %v",
2015-04-29 04:51:25 +03:00
opID,
2015-04-29 05:11:34 +03:00
fileLine,
fmt.Sprintf(format, v...))
// Print it.
c.logger.Println(msg)
}
2015-05-05 03:41:09 +03:00
// LOCKS_EXCLUDED(c.mu)
func (c *Connection) recordCancelFunc(
reqID bazilfuse.RequestID,
2015-05-05 03:41:52 +03:00
f func()) {
c.mu.Lock()
defer c.mu.Unlock()
if _, ok := c.cancelFuncs[reqID]; ok {
panic(fmt.Sprintf("Already have cancel func for request %v", reqID))
}
c.cancelFuncs[reqID] = f
}
2015-05-05 03:41:09 +03:00
// Wait until the process completes, then close off the trace and remove the
// context from the map.
//
// LOCKS_EXCLUDED(c.mu)
func (c *Connection) reportWhenPIDGone(
pid int,
ctx context.Context,
report reqtrace.ReportFunc) {
// HACK(jacobsa): Poll until the process no longer exists.
const pollPeriod = 50 * time.Millisecond
for {
// The man page for kill(2) says that if the signal is zero, then "no
// signal is sent, but error checking is still performed; this can be used
// to check for the existence of a process ID".
err := unix.Kill(pid, 0)
// ESRCH means the process is gone.
if err == unix.ESRCH {
break
}
// If we receive EPERM, we're not going to be able to do what we want. We
// don't really have any choice but to print info and leak.
if err == unix.EPERM {
log.Printf("Failed to kill(2) PID %v; no permissions. Leaking trace.", pid)
return
}
// Otherwise, panic.
if err != nil {
panic(fmt.Errorf("Kill(%v): %v", pid, err))
}
time.Sleep(pollPeriod)
}
// Finish up.
report(nil)
c.mu.Lock()
delete(c.pidMap, pid)
c.mu.Unlock()
}
// Set up a hacky per-PID trace context, if enabled. Either way, return a
// context from which an operation should inherit.
//
// See notes on fTraceByPID.
//
// LOCKS_EXCLUDED(c.mu)
func (c *Connection) maybeTraceByPID(
pid int) (ctx context.Context) {
ctx = c.parentCtx
// Is there anything to do?
if !reqtrace.Enabled() || !*fTraceByPID {
return
}
c.mu.Lock()
defer c.mu.Unlock()
// Do we already have a traced context for this PID?
if existing, ok := c.pidMap[pid]; ok {
ctx = existing
return
}
// Set up a new one and stick it in the map.
var report reqtrace.ReportFunc
ctx, report = reqtrace.Trace(ctx, fmt.Sprintf("Requests from PID %v", pid))
c.pidMap[pid] = ctx
// Ensure we close the trace and remove it from the map eventually.
go c.reportWhenPIDGone(pid, ctx, report)
return
}
// Set up state for an op that is about to be returned to the user, given its
// underlying bazilfuse request.
//
// Return a context that should be used for the op.
2015-05-05 03:41:09 +03:00
//
// LOCKS_EXCLUDED(c.mu)
func (c *Connection) beginOp(
bfReq bazilfuse.Request) (ctx context.Context) {
reqID := bfReq.Hdr().ID
2015-05-05 03:41:09 +03:00
// Note that the op is in flight.
2015-05-05 03:04:31 +03:00
c.opsInFlight.Add(1)
// Choose a parent context.
ctx = c.maybeTraceByPID(int(bfReq.Hdr().Pid))
2015-05-05 03:41:09 +03:00
// Set up a cancellation function.
//
// Special case: On Darwin, osxfuse appears to aggressively reuse "unique"
// request IDs. This matters for Forget requests, which have no reply
// associated and therefore appear to have IDs that are immediately eligible
// for reuse. For these, we should not record any state keyed on their ID.
//
// Cf. https://github.com/osxfuse/osxfuse/issues/208
if _, ok := bfReq.(*bazilfuse.ForgetRequest); !ok {
var cancel func()
ctx, cancel = context.WithCancel(ctx)
c.recordCancelFunc(reqID, cancel)
}
return
2015-05-05 03:04:31 +03:00
}
2015-05-05 03:04:03 +03:00
// Clean up all state associated with an op to which the user has responded,
// given its underlying bazilfuse request. This must be called before a
// response is sent to the kernel, to avoid a race where the request's ID might
// be reused by osxfuse.
2015-05-05 03:41:09 +03:00
//
// LOCKS_EXCLUDED(c.mu)
func (c *Connection) finishOp(bfReq bazilfuse.Request) {
2015-05-05 03:41:09 +03:00
c.mu.Lock()
defer c.mu.Unlock()
reqID := bfReq.Hdr().ID
2015-05-05 03:41:09 +03:00
// Even though the op is finished, context.WithCancel requires us to arrange
// for the cancellation function to be invoked. We also must remove it from
// our map.
//
// Special case: we don't do this for Forget requests. See the note in
// beginOp above.
if _, ok := bfReq.(*bazilfuse.ForgetRequest); !ok {
cancel, ok := c.cancelFuncs[reqID]
if !ok {
panic(fmt.Sprintf("Unknown request ID in finishOp: %v", reqID))
}
2015-05-05 03:41:09 +03:00
cancel()
delete(c.cancelFuncs, reqID)
}
2015-05-05 03:41:09 +03:00
// Decrement the in-flight counter.
2015-05-05 03:04:31 +03:00
c.opsInFlight.Done()
}
2015-05-05 03:04:03 +03:00
2015-05-05 05:21:57 +03:00
// LOCKS_EXCLUDED(c.mu)
func (c *Connection) handleInterrupt(req *bazilfuse.InterruptRequest) {
c.mu.Lock()
defer c.mu.Unlock()
// NOTE(jacobsa): fuse.txt in the Linux kernel documentation
// (https://goo.gl/H55Dnr) defines the kernel <-> userspace protocol for
// interrupts.
//
// In particular, my reading of it is that an interrupt request cannot be
// delivered to userspace before the original request. The part about the
// race and EAGAIN appears to be aimed at userspace programs that
// concurrently process requests.
//
// So in this method we assume that if we can't find the ID to be
// interrupted, it means that the request has already been replied to.
cancel, ok := c.cancelFuncs[req.IntrID]
if !ok {
return
}
cancel()
}
2015-03-24 07:19:42 +03:00
// Read the next op from the kernel process. Return io.EOF if the kernel has
// closed the connection.
2015-03-24 07:23:19 +03:00
//
// This function delivers ops in exactly the order they are received from
2015-04-29 04:28:16 +03:00
// /dev/fuse. It must not be called multiple times concurrently.
2015-05-05 03:41:09 +03:00
//
// LOCKS_EXCLUDED(c.mu)
2015-03-24 07:26:02 +03:00
func (c *Connection) ReadOp() (op fuseops.Op, err error) {
// Keep going until we find a request we know how to convert.
for {
// Read a bazilfuse request.
2015-05-05 03:36:38 +03:00
var bfReq bazilfuse.Request
2015-03-24 07:26:02 +03:00
bfReq, err = c.wrapped.ReadRequest()
2015-05-05 03:36:38 +03:00
2015-03-24 07:26:02 +03:00
if err != nil {
return
}
// Choose an ID for this operation.
opID := c.nextOpID
c.nextOpID++
// Log the receipt of the operation.
c.log(opID, 1, "<- %v", bfReq)
2015-03-24 07:52:14 +03:00
2015-05-05 05:21:57 +03:00
// Special case: responding to statfs is required to make mounting work on
// OS X. We don't currently expose the capability for the file system to
2015-03-24 07:51:36 +03:00
// intercept this.
if statfsReq, ok := bfReq.(*bazilfuse.StatfsRequest); ok {
c.log(opID, 1, "-> (Statfs) OK")
2015-03-24 07:51:36 +03:00
statfsReq.Respond(&bazilfuse.StatfsResponse{})
continue
}
2015-05-05 05:21:57 +03:00
// Special case: handle interrupt requests.
if interruptReq, ok := bfReq.(*bazilfuse.InterruptRequest); ok {
c.handleInterrupt(interruptReq)
continue
}
2015-05-05 03:36:38 +03:00
// Set up op dependencies.
opCtx := c.beginOp(bfReq)
2015-05-05 03:36:38 +03:00
logForOp := func(calldepth int, format string, v ...interface{}) {
c.log(opID, calldepth+1, format, v...)
2015-04-29 04:35:28 +03:00
}
finished := func(err error) { c.finishOp(bfReq) }
2015-05-05 03:04:03 +03:00
2015-05-05 03:36:38 +03:00
op = fuseops.Convert(opCtx, bfReq, logForOp, finished)
2015-03-24 07:26:02 +03:00
return
}
}
2015-03-24 07:34:50 +03:00
2015-03-24 07:36:09 +03:00
func (c *Connection) waitForReady() (err error) {
<-c.wrapped.Ready
err = c.wrapped.MountError
return
}
2015-03-24 07:34:50 +03:00
// Close the connection and wait for in-flight ops.
2015-03-24 07:36:09 +03:00
func (c *Connection) close() (err error) {
err = c.wrapped.Close()
c.opsInFlight.Wait()
2015-03-24 07:36:09 +03:00
return
}