Allow opting out of writeback caching.

See the documentation on MountConfig.DisableWritebackCaching for
discussion.
geesefs-0-30-9
Aaron Jacobs 2015-08-12 02:33:08 +00:00
commit 89495b2e04
3 changed files with 77 additions and 16 deletions

View File

@ -57,6 +57,7 @@ const maxReadahead = 1 << 20
// A connection to the fuse kernel process.
type Connection struct {
cfg MountConfig
debugLogger *log.Logger
errorLogger *log.Logger
@ -65,9 +66,6 @@ type Connection struct {
dev *os.File
protocol fusekernel.Protocol
// The context from which all op contexts inherit.
parentCtx context.Context
mu sync.Mutex
// A map from fuse "unique" request ID (*not* the op ID for logging used
@ -94,15 +92,15 @@ type opState struct {
//
// The loggers may be nil.
func newConnection(
parentCtx context.Context,
cfg MountConfig,
debugLogger *log.Logger,
errorLogger *log.Logger,
dev *os.File) (c *Connection, err error) {
c = &Connection{
cfg: cfg,
debugLogger: debugLogger,
errorLogger: errorLogger,
dev: dev,
parentCtx: parentCtx,
cancelFuncs: make(map[uint64]func()),
}
@ -165,12 +163,10 @@ func (c *Connection) Init() (err error) {
// Tell the kernel not to use pitifully small 4 KiB writes.
initOp.Flags |= fusekernel.InitBigWrites
// TODO(jacobsa): Make this opt out and discuss benefits and caveats:
// * Write performance may be better (cf. http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/13923)
// * (Discuss what writeback caching even means)
// * File systems need to implement setattr for dealing with kernel's stored time (find code reference)
// * File systems no longer "own" mtime; kernel will cache it even if no writes (cf. http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/14808)
initOp.Flags |= fusekernel.InitWritebackCache
// Enable writeback caching if the user hasn't asked us not to.
if !c.cfg.DisableWritebackCaching {
initOp.Flags |= fusekernel.InitWritebackCache
}
c.Reply(ctx, nil)
return
@ -234,7 +230,7 @@ func (c *Connection) beginOp(
opCode uint32,
fuseID uint64) (ctx context.Context) {
// Start with the parent context.
ctx = c.parentCtx
ctx = c.cfg.OpContext
// Set up a cancellation function.
//

View File

@ -67,14 +67,14 @@ func Mount(
}
// Choose a parent context for ops.
opContext := config.OpContext
if opContext == nil {
opContext = context.Background()
cfgCopy := *config
if cfgCopy.OpContext == nil {
cfgCopy.OpContext = context.Background()
}
// Create a Connection object wrapping the device.
connection, err := newConnection(
opContext,
cfgCopy,
config.DebugLogger,
config.ErrorLogger,
dev)

View File

@ -48,6 +48,71 @@ type MountConfig struct {
// performed.
DebugLogger *log.Logger
// Linux only.
//
// By default on Linux we allow the kernel to perform writeback caching
// (cf. http://goo.gl/LdZzo1):
//
// * When the user calls write(2), the kernel sticks the user's data into
// its page cache. Only later does it call through to the file system,
// potentially after coalescing multiple small user writes.
//
// * The file system may receive multiple write ops from the kernel
// concurrently if there is a lot of page cache data to flush.
//
// * Write performance may be significantly improved due to the user and
// the kernel not waiting for serial round trips to the file system. This
// is especially true if the user makes tiny writes.
//
// * close(2) (and anything else calling f_op->flush) causes all dirty
// pages to be written out before it proceeds to send a FlushFileOp
// (cf. https://goo.gl/TMrY6X).
//
// * Similarly, close(2) causes the kernel to send a setattr request
// filling in the mtime if any dirty pages were flushed, since the time
// at which the pages were written to the file system can't be trusted.
//
// * close(2) (and anything else calling f_op->flush) writes out all dirty
// pages, then sends a setattr request with an appropriate mtime for
// those writes if there were any, and only then proceeds to send a flush
//
// Code walk:
//
// * (https://goo.gl/zTIZQ9) fuse_flush calls write_inode_now before
// calling the file system. The latter eventually calls into
// __writeback_single_inode.
//
// * (https://goo.gl/L7Z2w5) __writeback_single_inode calls
// do_writepages, which writes out any dirty pages.
//
// * (https://goo.gl/DOPgla) __writeback_single_inode later calls
// write_inode, which calls into the superblock op struct's write_inode
// member. For fuse, this is fuse_write_inode
// (cf. https://goo.gl/eDSKOX).
//
// * (https://goo.gl/PbkGA1) fuse_write_inode calls fuse_flush_times.
//
// * (https://goo.gl/ig8x9V) fuse_flush_times sends a setttr request
// for setting the inode's mtime.
//
// However, this brings along some caveats:
//
// * The file system must handle SetInodeAttributesOp or close(2) will fail,
// due to the call chain into fuse_flush_times listed above.
//
// * The kernel caches mtime and ctime regardless of whether the file
// system tells it to do so, disregarding the result of further getattr
// requests (cf. https://goo.gl/3ZZMUw, https://goo.gl/7WtQUp). It
// appears this may be true of the file size, too. Writeback caching may
// therefore not be suitable for file systems where these attributes can
// spontaneously change for reasons the kernel doesn't observe. See
// http://goo.gl/V5WQCN for more discussion.
//
// Setting DisableWritebackCaching disables this behavior. Instead the file
// system is called one or more times for each write(2), and the user's
// syscall doesn't return until the file system returns.
DisableWritebackCaching bool
// OS X only.
//
// Normally on OS X we mount with the novncache option