Allow opting out of writeback caching.
See the documentation on MountConfig.DisableWritebackCaching for discussion.geesefs-0-30-9
commit
89495b2e04
|
@ -57,6 +57,7 @@ const maxReadahead = 1 << 20
|
|||
|
||||
// A connection to the fuse kernel process.
|
||||
type Connection struct {
|
||||
cfg MountConfig
|
||||
debugLogger *log.Logger
|
||||
errorLogger *log.Logger
|
||||
|
||||
|
@ -65,9 +66,6 @@ type Connection struct {
|
|||
dev *os.File
|
||||
protocol fusekernel.Protocol
|
||||
|
||||
// The context from which all op contexts inherit.
|
||||
parentCtx context.Context
|
||||
|
||||
mu sync.Mutex
|
||||
|
||||
// A map from fuse "unique" request ID (*not* the op ID for logging used
|
||||
|
@ -94,15 +92,15 @@ type opState struct {
|
|||
//
|
||||
// The loggers may be nil.
|
||||
func newConnection(
|
||||
parentCtx context.Context,
|
||||
cfg MountConfig,
|
||||
debugLogger *log.Logger,
|
||||
errorLogger *log.Logger,
|
||||
dev *os.File) (c *Connection, err error) {
|
||||
c = &Connection{
|
||||
cfg: cfg,
|
||||
debugLogger: debugLogger,
|
||||
errorLogger: errorLogger,
|
||||
dev: dev,
|
||||
parentCtx: parentCtx,
|
||||
cancelFuncs: make(map[uint64]func()),
|
||||
}
|
||||
|
||||
|
@ -165,12 +163,10 @@ func (c *Connection) Init() (err error) {
|
|||
// Tell the kernel not to use pitifully small 4 KiB writes.
|
||||
initOp.Flags |= fusekernel.InitBigWrites
|
||||
|
||||
// TODO(jacobsa): Make this opt out and discuss benefits and caveats:
|
||||
// * Write performance may be better (cf. http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/13923)
|
||||
// * (Discuss what writeback caching even means)
|
||||
// * File systems need to implement setattr for dealing with kernel's stored time (find code reference)
|
||||
// * File systems no longer "own" mtime; kernel will cache it even if no writes (cf. http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/14808)
|
||||
// Enable writeback caching if the user hasn't asked us not to.
|
||||
if !c.cfg.DisableWritebackCaching {
|
||||
initOp.Flags |= fusekernel.InitWritebackCache
|
||||
}
|
||||
|
||||
c.Reply(ctx, nil)
|
||||
return
|
||||
|
@ -234,7 +230,7 @@ func (c *Connection) beginOp(
|
|||
opCode uint32,
|
||||
fuseID uint64) (ctx context.Context) {
|
||||
// Start with the parent context.
|
||||
ctx = c.parentCtx
|
||||
ctx = c.cfg.OpContext
|
||||
|
||||
// Set up a cancellation function.
|
||||
//
|
||||
|
|
8
mount.go
8
mount.go
|
@ -67,14 +67,14 @@ func Mount(
|
|||
}
|
||||
|
||||
// Choose a parent context for ops.
|
||||
opContext := config.OpContext
|
||||
if opContext == nil {
|
||||
opContext = context.Background()
|
||||
cfgCopy := *config
|
||||
if cfgCopy.OpContext == nil {
|
||||
cfgCopy.OpContext = context.Background()
|
||||
}
|
||||
|
||||
// Create a Connection object wrapping the device.
|
||||
connection, err := newConnection(
|
||||
opContext,
|
||||
cfgCopy,
|
||||
config.DebugLogger,
|
||||
config.ErrorLogger,
|
||||
dev)
|
||||
|
|
|
@ -48,6 +48,71 @@ type MountConfig struct {
|
|||
// performed.
|
||||
DebugLogger *log.Logger
|
||||
|
||||
// Linux only.
|
||||
//
|
||||
// By default on Linux we allow the kernel to perform writeback caching
|
||||
// (cf. http://goo.gl/LdZzo1):
|
||||
//
|
||||
// * When the user calls write(2), the kernel sticks the user's data into
|
||||
// its page cache. Only later does it call through to the file system,
|
||||
// potentially after coalescing multiple small user writes.
|
||||
//
|
||||
// * The file system may receive multiple write ops from the kernel
|
||||
// concurrently if there is a lot of page cache data to flush.
|
||||
//
|
||||
// * Write performance may be significantly improved due to the user and
|
||||
// the kernel not waiting for serial round trips to the file system. This
|
||||
// is especially true if the user makes tiny writes.
|
||||
//
|
||||
// * close(2) (and anything else calling f_op->flush) causes all dirty
|
||||
// pages to be written out before it proceeds to send a FlushFileOp
|
||||
// (cf. https://goo.gl/TMrY6X).
|
||||
//
|
||||
// * Similarly, close(2) causes the kernel to send a setattr request
|
||||
// filling in the mtime if any dirty pages were flushed, since the time
|
||||
// at which the pages were written to the file system can't be trusted.
|
||||
//
|
||||
// * close(2) (and anything else calling f_op->flush) writes out all dirty
|
||||
// pages, then sends a setattr request with an appropriate mtime for
|
||||
// those writes if there were any, and only then proceeds to send a flush
|
||||
//
|
||||
// Code walk:
|
||||
//
|
||||
// * (https://goo.gl/zTIZQ9) fuse_flush calls write_inode_now before
|
||||
// calling the file system. The latter eventually calls into
|
||||
// __writeback_single_inode.
|
||||
//
|
||||
// * (https://goo.gl/L7Z2w5) __writeback_single_inode calls
|
||||
// do_writepages, which writes out any dirty pages.
|
||||
//
|
||||
// * (https://goo.gl/DOPgla) __writeback_single_inode later calls
|
||||
// write_inode, which calls into the superblock op struct's write_inode
|
||||
// member. For fuse, this is fuse_write_inode
|
||||
// (cf. https://goo.gl/eDSKOX).
|
||||
//
|
||||
// * (https://goo.gl/PbkGA1) fuse_write_inode calls fuse_flush_times.
|
||||
//
|
||||
// * (https://goo.gl/ig8x9V) fuse_flush_times sends a setttr request
|
||||
// for setting the inode's mtime.
|
||||
//
|
||||
// However, this brings along some caveats:
|
||||
//
|
||||
// * The file system must handle SetInodeAttributesOp or close(2) will fail,
|
||||
// due to the call chain into fuse_flush_times listed above.
|
||||
//
|
||||
// * The kernel caches mtime and ctime regardless of whether the file
|
||||
// system tells it to do so, disregarding the result of further getattr
|
||||
// requests (cf. https://goo.gl/3ZZMUw, https://goo.gl/7WtQUp). It
|
||||
// appears this may be true of the file size, too. Writeback caching may
|
||||
// therefore not be suitable for file systems where these attributes can
|
||||
// spontaneously change for reasons the kernel doesn't observe. See
|
||||
// http://goo.gl/V5WQCN for more discussion.
|
||||
//
|
||||
// Setting DisableWritebackCaching disables this behavior. Instead the file
|
||||
// system is called one or more times for each write(2), and the user's
|
||||
// syscall doesn't return until the file system returns.
|
||||
DisableWritebackCaching bool
|
||||
|
||||
// OS X only.
|
||||
//
|
||||
// Normally on OS X we mount with the novncache option
|
||||
|
|
Loading…
Reference in New Issue