diff --git a/connection.go b/connection.go index 60aa997..0d7e13a 100644 --- a/connection.go +++ b/connection.go @@ -57,6 +57,7 @@ const maxReadahead = 1 << 20 // A connection to the fuse kernel process. type Connection struct { + cfg MountConfig debugLogger *log.Logger errorLogger *log.Logger @@ -65,9 +66,6 @@ type Connection struct { dev *os.File protocol fusekernel.Protocol - // The context from which all op contexts inherit. - parentCtx context.Context - mu sync.Mutex // A map from fuse "unique" request ID (*not* the op ID for logging used @@ -94,15 +92,15 @@ type opState struct { // // The loggers may be nil. func newConnection( - parentCtx context.Context, + cfg MountConfig, debugLogger *log.Logger, errorLogger *log.Logger, dev *os.File) (c *Connection, err error) { c = &Connection{ + cfg: cfg, debugLogger: debugLogger, errorLogger: errorLogger, dev: dev, - parentCtx: parentCtx, cancelFuncs: make(map[uint64]func()), } @@ -165,12 +163,10 @@ func (c *Connection) Init() (err error) { // Tell the kernel not to use pitifully small 4 KiB writes. initOp.Flags |= fusekernel.InitBigWrites - // TODO(jacobsa): Make this opt out and discuss benefits and caveats: - // * Write performance may be better (cf. http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/13923) - // * (Discuss what writeback caching even means) - // * File systems need to implement setattr for dealing with kernel's stored time (find code reference) - // * File systems no longer "own" mtime; kernel will cache it even if no writes (cf. http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/14808) - initOp.Flags |= fusekernel.InitWritebackCache + // Enable writeback caching if the user hasn't asked us not to. + if !c.cfg.DisableWritebackCaching { + initOp.Flags |= fusekernel.InitWritebackCache + } c.Reply(ctx, nil) return @@ -234,7 +230,7 @@ func (c *Connection) beginOp( opCode uint32, fuseID uint64) (ctx context.Context) { // Start with the parent context. - ctx = c.parentCtx + ctx = c.cfg.OpContext // Set up a cancellation function. // diff --git a/mount.go b/mount.go index 7f9c553..6227617 100644 --- a/mount.go +++ b/mount.go @@ -67,14 +67,14 @@ func Mount( } // Choose a parent context for ops. - opContext := config.OpContext - if opContext == nil { - opContext = context.Background() + cfgCopy := *config + if cfgCopy.OpContext == nil { + cfgCopy.OpContext = context.Background() } // Create a Connection object wrapping the device. connection, err := newConnection( - opContext, + cfgCopy, config.DebugLogger, config.ErrorLogger, dev) diff --git a/mount_config.go b/mount_config.go index dd76561..7855a26 100644 --- a/mount_config.go +++ b/mount_config.go @@ -48,6 +48,71 @@ type MountConfig struct { // performed. DebugLogger *log.Logger + // Linux only. + // + // By default on Linux we allow the kernel to perform writeback caching + // (cf. http://goo.gl/LdZzo1): + // + // * When the user calls write(2), the kernel sticks the user's data into + // its page cache. Only later does it call through to the file system, + // potentially after coalescing multiple small user writes. + // + // * The file system may receive multiple write ops from the kernel + // concurrently if there is a lot of page cache data to flush. + // + // * Write performance may be significantly improved due to the user and + // the kernel not waiting for serial round trips to the file system. This + // is especially true if the user makes tiny writes. + // + // * close(2) (and anything else calling f_op->flush) causes all dirty + // pages to be written out before it proceeds to send a FlushFileOp + // (cf. https://goo.gl/TMrY6X). + // + // * Similarly, close(2) causes the kernel to send a setattr request + // filling in the mtime if any dirty pages were flushed, since the time + // at which the pages were written to the file system can't be trusted. + // + // * close(2) (and anything else calling f_op->flush) writes out all dirty + // pages, then sends a setattr request with an appropriate mtime for + // those writes if there were any, and only then proceeds to send a flush + // + // Code walk: + // + // * (https://goo.gl/zTIZQ9) fuse_flush calls write_inode_now before + // calling the file system. The latter eventually calls into + // __writeback_single_inode. + // + // * (https://goo.gl/L7Z2w5) __writeback_single_inode calls + // do_writepages, which writes out any dirty pages. + // + // * (https://goo.gl/DOPgla) __writeback_single_inode later calls + // write_inode, which calls into the superblock op struct's write_inode + // member. For fuse, this is fuse_write_inode + // (cf. https://goo.gl/eDSKOX). + // + // * (https://goo.gl/PbkGA1) fuse_write_inode calls fuse_flush_times. + // + // * (https://goo.gl/ig8x9V) fuse_flush_times sends a setttr request + // for setting the inode's mtime. + // + // However, this brings along some caveats: + // + // * The file system must handle SetInodeAttributesOp or close(2) will fail, + // due to the call chain into fuse_flush_times listed above. + // + // * The kernel caches mtime and ctime regardless of whether the file + // system tells it to do so, disregarding the result of further getattr + // requests (cf. https://goo.gl/3ZZMUw, https://goo.gl/7WtQUp). It + // appears this may be true of the file size, too. Writeback caching may + // therefore not be suitable for file systems where these attributes can + // spontaneously change for reasons the kernel doesn't observe. See + // http://goo.gl/V5WQCN for more discussion. + // + // Setting DisableWritebackCaching disables this behavior. Instead the file + // system is called one or more times for each write(2), and the user's + // syscall doesn't return until the file system returns. + DisableWritebackCaching bool + // OS X only. // // Normally on OS X we mount with the novncache option