// Copyright 2015 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package fuseops contains implementations of the fuse.Op interface that may // be returned by fuse.Connection.ReadOp. See documentation in that package for // more. package fuseops import ( "os" "time" "github.com/jacobsa/bazilfuse" "golang.org/x/net/context" ) // A common interface implemented by all ops in this package. Use a type switch // to find particular concrete types, responding with fuse.ENOSYS if a type is // not supported. type Op interface { // Return the fields common to all operations. Header() OpHeader // A context that can be used for long-running operations. Context() context.Context // Repond to the operation with the supplied error. If there is no error, set // any necessary output fields and then call Respond(nil). Respond(error) // Log information tied to this operation, with semantics equivalent to // log.Printf, except that the format is different and logging is suppressed // if --fuse.debug is not set. Logf(format string, v ...interface{}) } //////////////////////////////////////////////////////////////////////// // Setup //////////////////////////////////////////////////////////////////////// // Sent once when mounting the file system. It must succeed in order for the // mount to succeed. type InitOp struct { commonOp maxReadahead uint32 } func (o *InitOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.InitResponse{} // Ask the Linux kernel for larger write requests. // // As of 2015-03-26, the behavior in the kernel is: // // * (http://goo.gl/jMKHMZ, http://goo.gl/XTF4ZH) Cap the max write size at // the maximum of 4096 and init_response->max_write. // // * (http://goo.gl/gEIvHZ) If FUSE_BIG_WRITES isn't set, don't return more // than one page. // // * (http://goo.gl/4RLhxZ, http://goo.gl/hi0Cm2) Never write more than // FUSE_MAX_PAGES_PER_REQ pages (128 KiB on x86). // // 4 KiB is crazy small. Ask for significantly more, and take what the kernel // will give us. const maxWrite = 1 << 21 resp.Flags |= bazilfuse.InitBigWrites resp.MaxWrite = maxWrite // Ask the Linux kernel for larger read requests. // // As of 2015-03-26, the behavior in the kernel is: // // * (http://goo.gl/bQ1f1i, http://goo.gl/HwBrR6) Set the local variable // ra_pages to be init_response->max_readahead divided by the page size. // // * (http://goo.gl/gcIsSh, http://goo.gl/LKV2vA) Set // backing_dev_info::ra_pages to the min of that value and what was sent // in the request's max_readahead field. // // * (http://goo.gl/u2SqzH) Use backing_dev_info::ra_pages when deciding // how much to read ahead. // // * (http://goo.gl/JnhbdL) Don't read ahead at all if that field is zero. // // Reading a page at a time is a drag. Ask for as much as the kernel is // willing to give us. resp.MaxReadahead = o.maxReadahead // Respond. o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.InitRequest).Respond(&resp) } //////////////////////////////////////////////////////////////////////// // Inodes //////////////////////////////////////////////////////////////////////// // Look up a child by name within a parent directory. The kernel sends this // when resolving user paths to dentry structs, which are then cached. type LookUpInodeOp struct { commonOp // The ID of the directory inode to which the child belongs. Parent InodeID // The name of the child of interest, relative to the parent. For example, in // this directory structure: // // foo/ // bar/ // baz // // the file system may receive a request to look up the child named "bar" for // the parent foo/. Name string // The resulting entry. Must be filled out by the file system. // // The lookup count for the inode is implicitly incremented. See notes on // ForgetInodeOp for more information. Entry ChildInodeEntry } func (o *LookUpInodeOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.LookupResponse{} convertChildInodeEntry(&o.Entry, &resp) o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.LookupRequest).Respond(&resp) } // Refresh the attributes for an inode whose ID was previously returned in a // LookUpInodeOp. The kernel sends this when the FUSE VFS layer's cache of // inode attributes is stale. This is controlled by the AttributesExpiration // field of ChildInodeEntry, etc. type GetInodeAttributesOp struct { commonOp // The inode of interest. Inode InodeID // Set by the file system: attributes for the inode, and the time at which // they should expire. See notes on ChildInodeEntry.AttributesExpiration for // more. Attributes InodeAttributes AttributesExpiration time.Time } func (o *GetInodeAttributesOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.GetattrResponse{ Attr: convertAttributes(o.Inode, o.Attributes), AttrValid: convertExpirationTime(o.AttributesExpiration), } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.GetattrRequest).Respond(&resp) } // Change attributes for an inode. // // The kernel sends this for obvious cases like chmod(2), and for less obvious // cases like ftrunctate(2). type SetInodeAttributesOp struct { commonOp // The inode of interest. Inode InodeID // The attributes to modify, or nil for attributes that don't need a change. Size *uint64 Mode *os.FileMode Atime *time.Time Mtime *time.Time // Set by the file system: the new attributes for the inode, and the time at // which they should expire. See notes on // ChildInodeEntry.AttributesExpiration for more. Attributes InodeAttributes AttributesExpiration time.Time } func (o *SetInodeAttributesOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.SetattrResponse{ Attr: convertAttributes(o.Inode, o.Attributes), AttrValid: convertExpirationTime(o.AttributesExpiration), } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.SetattrRequest).Respond(&resp) } // Decrement the reference count for an inode ID previously issued by the file // system. // // The comments for the ops that implicitly increment the reference count // contain a note of this (but see also the note about the root inode below). // For example, LookUpInodeOp and MkDirOp. The authoritative source is the // libfuse documentation, which states that any op that returns // fuse_reply_entry fuse_reply_create implicitly increments (cf. // http://goo.gl/o5C7Dx). // // If the reference count hits zero, the file system can forget about that ID // entirely, and even re-use it in future responses. The kernel guarantees that // it will not otherwise use it again. // // The reference count corresponds to fuse_inode::nlookup // (http://goo.gl/ut48S4). Some examples of where the kernel manipulates it: // // * (http://goo.gl/vPD9Oh) Any caller to fuse_iget increases the count. // * (http://goo.gl/B6tTTC) fuse_lookup_name calls fuse_iget. // * (http://goo.gl/IlcxWv) fuse_create_open calls fuse_iget. // * (http://goo.gl/VQMQul) fuse_dentry_revalidate increments after // revalidating. // // In contrast to all other inodes, RootInodeID begins with an implicit // reference count of one, without a corresponding op to increase it. (There // could be no such op, because the root cannot be referred to by name.) Code // walk: // // * (http://goo.gl/gWAheU) fuse_fill_super calls fuse_get_root_inode. // // * (http://goo.gl/AoLsbb) fuse_get_root_inode calls fuse_iget without // sending any particular request. // // * (http://goo.gl/vPD9Oh) fuse_iget increments nlookup. // // File systems should not make assumptions about whether they will or will not // receive a ForgetInodeOp for the root inode. Experimentally, OS X seems to // never send one, while Linux appears to send one only sometimes. (Cf. // http://goo.gl/EUbxEg, fuse-devel thread "Root inode lookup count"). type ForgetInodeOp struct { commonOp // The inode whose reference count should be decremented. Inode InodeID // The amount to decrement the reference count. N uint64 } func (o *ForgetInodeOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to ForgetInodeOp") o.r.(*bazilfuse.ForgetRequest).Respond() } //////////////////////////////////////////////////////////////////////// // Inode creation //////////////////////////////////////////////////////////////////////// // Create a directory inode as a child of an existing directory inode. The // kernel sends this in response to a mkdir(2) call. // // The kernel appears to verify the name doesn't already exist (mkdir calls // mkdirat calls user_path_create calls filename_create, which verifies: // http://goo.gl/FZpLu5). But volatile file systems and paranoid non-volatile // file systems should check for the reasons described below on CreateFile. type MkDirOp struct { commonOp // The ID of parent directory inode within which to create the child. Parent InodeID // The name of the child to create, and the mode with which to create it. Name string Mode os.FileMode // Set by the file system: information about the inode that was created. // // The lookup count for the inode is implicitly incremented. See notes on // ForgetInodeOp for more information. Entry ChildInodeEntry } func (o *MkDirOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.MkdirResponse{} convertChildInodeEntry(&o.Entry, &resp.LookupResponse) o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.MkdirRequest).Respond(&resp) } // Create a file inode and open it. // // The kernel sends this when the user asks to open a file with the O_CREAT // flag and the kernel has observed that the file doesn't exist. (See for // example lookup_open, http://goo.gl/PlqE9d). // // However it's impossible to tell for sure that all kernels make this check // in all cases and the official fuse documentation is less than encouraging // (" the file does not exist, first create it with the specified mode, and // then open it"). Therefore file systems would be smart to be paranoid and // check themselves, returning EEXIST when the file already exists. This of // course particularly applies to file systems that are volatile from the // kernel's point of view. type CreateFileOp struct { commonOp // The ID of parent directory inode within which to create the child file. Parent InodeID // The name of the child to create, and the mode with which to create it. Name string Mode os.FileMode // Flags for the open operation. Flags bazilfuse.OpenFlags // Set by the file system: information about the inode that was created. // // The lookup count for the inode is implicitly incremented. See notes on // ForgetInodeOp for more information. Entry ChildInodeEntry // Set by the file system: an opaque ID that will be echoed in follow-up // calls for this file using the same struct file in the kernel. In practice // this usually means follow-up calls using the file descriptor returned by // open(2). // // The handle may be supplied in future ops like ReadFileOp that contain a // file handle. The file system must ensure this ID remains valid until a // later call to ReleaseFileHandle. Handle HandleID } func (o *CreateFileOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.CreateResponse{ OpenResponse: bazilfuse.OpenResponse{ Handle: bazilfuse.HandleID(o.Handle), }, } convertChildInodeEntry(&o.Entry, &resp.LookupResponse) o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.CreateRequest).Respond(&resp) } //////////////////////////////////////////////////////////////////////// // Unlinking //////////////////////////////////////////////////////////////////////// // Unlink a directory from its parent. Because directories cannot have a link // count above one, this means the directory inode should be deleted as well // once the kernel sends ForgetInodeOp. // // The file system is responsible for checking that the directory is empty. // // Sample implementation in ext2: ext2_rmdir (http://goo.gl/B9QmFf) type RmDirOp struct { commonOp // The ID of parent directory inode, and the name of the directory being // removed within it. Parent InodeID Name string } func (o *RmDirOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to RmDirOp") o.r.(*bazilfuse.RemoveRequest).Respond() } // Unlink a file from its parent. If this brings the inode's link count to // zero, the inode should be deleted once the kernel sends ForgetInodeOp. It // may still be referenced before then if a user still has the file open. // // Sample implementation in ext2: ext2_unlink (http://goo.gl/hY6r6C) type UnlinkOp struct { commonOp // The ID of parent directory inode, and the name of the file being removed // within it. Parent InodeID Name string } func (o *UnlinkOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to UnlinkOp") o.r.(*bazilfuse.RemoveRequest).Respond() } //////////////////////////////////////////////////////////////////////// // Directory handles //////////////////////////////////////////////////////////////////////// // Open a directory inode. // // On Linux the sends this when setting up a struct file for a particular inode // with type directory, usually in response to an open(2) call from a // user-space process. On OS X it may not be sent for every open(2) (cf. // https://github.com/osxfuse/osxfuse/issues/199). type OpenDirOp struct { commonOp // The ID of the inode to be opened. Inode InodeID // Mode and options flags. Flags bazilfuse.OpenFlags // Set by the file system: an opaque ID that will be echoed in follow-up // calls for this directory using the same struct file in the kernel. In // practice this usually means follow-up calls using the file descriptor // returned by open(2). // // The handle may be supplied in future ops like ReadDirOp that contain a // directory handle. The file system must ensure this ID remains valid until // a later call to ReleaseDirHandle. Handle HandleID } func (o *OpenDirOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.OpenResponse{ Handle: bazilfuse.HandleID(o.Handle), } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.OpenRequest).Respond(&resp) } // Read entries from a directory previously opened with OpenDir. type ReadDirOp struct { commonOp // The directory inode that we are reading, and the handle previously // returned by OpenDir when opening that inode. Inode InodeID Handle HandleID // The offset within the directory at which to read. // // Warning: this field is not necessarily a count of bytes. Its legal values // are defined by the results returned in ReadDirResponse. See the notes // below and the notes on that struct. // // In the Linux kernel this ultimately comes from file::f_pos, which starts // at zero and is set by llseek and by the final consumed result returned by // each call to ReadDir: // // * (http://goo.gl/2nWJPL) iterate_dir, which is called by getdents(2) and // readdir(2), sets dir_context::pos to file::f_pos before calling // f_op->iterate, and then does the opposite assignment afterward. // // * (http://goo.gl/rTQVSL) fuse_readdir, which implements iterate for fuse // directories, passes dir_context::pos as the offset to fuse_read_fill, // which passes it on to user-space. fuse_readdir later calls // parse_dirfile with the same context. // // * (http://goo.gl/vU5ukv) For each returned result (except perhaps the // last, which may be truncated by the page boundary), parse_dirfile // updates dir_context::pos with fuse_dirent::off. // // It is affected by the Posix directory stream interfaces in the following // manner: // // * (http://goo.gl/fQhbyn, http://goo.gl/ns1kDF) opendir initially causes // filepos to be set to zero. // // * (http://goo.gl/ezNKyR, http://goo.gl/xOmDv0) readdir allows the user // to iterate through the directory one entry at a time. As each entry is // consumed, its d_off field is stored in __dirstream::filepos. // // * (http://goo.gl/WEOXG8, http://goo.gl/rjSXl3) telldir allows the user // to obtain the d_off field from the most recently returned entry. // // * (http://goo.gl/WG3nDZ, http://goo.gl/Lp0U6W) seekdir allows the user // to seek backward to an offset previously returned by telldir. It // stores the new offset in filepos, and calls llseek to update the // kernel's struct file. // // * (http://goo.gl/gONQhz, http://goo.gl/VlrQkc) rewinddir allows the user // to go back to the beginning of the directory, obtaining a fresh view. // It updates filepos and calls llseek to update the kernel's struct // file. // // Unfortunately, FUSE offers no way to intercept seeks // (http://goo.gl/H6gEXa), so there is no way to cause seekdir or rewinddir // to fail. Additionally, there is no way to distinguish an explicit // rewinddir followed by readdir from the initial readdir, or a rewinddir // from a seekdir to the value returned by telldir just after opendir. // // Luckily, Posix is vague about what the user will see if they seek // backwards, and requires the user not to seek to an old offset after a // rewind. The only requirement on freshness is that rewinddir results in // something that looks like a newly-opened directory. So FUSE file systems // may e.g. cache an entire fresh listing for each ReadDir with a zero // offset, and return array offsets into that cached listing. Offset DirOffset // The maximum number of bytes to return in ReadDirResponse.Data. A smaller // number is acceptable. Size int // Set by the file system: a buffer consisting of a sequence of FUSE // directory entries in the format generated by fuse_add_direntry // (http://goo.gl/qCcHCV), which is consumed by parse_dirfile // (http://goo.gl/2WUmD2). Use fuseutil.AppendDirent to generate this data. // // The buffer must not exceed the length specified in ReadDirRequest.Size. It // is okay for the final entry to be truncated; parse_dirfile copes with this // by ignoring the partial record. // // Each entry returned exposes a directory offset to the user that may later // show up in ReadDirRequest.Offset. See notes on that field for more // information. // // An empty buffer indicates the end of the directory has been reached. Data []byte } func (o *ReadDirOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.ReadResponse{ Data: o.Data, } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.ReadRequest).Respond(&resp) } // Release a previously-minted directory handle. The kernel sends this when // there are no more references to an open directory: all file descriptors are // closed and all memory mappings are unmapped. // // The kernel guarantees that the handle ID will not be used in further ops // sent to the file system (unless it is reissued by the file system). // // Errors from this op are ignored by the kernel (cf. http://goo.gl/RL38Do). type ReleaseDirHandleOp struct { commonOp // The handle ID to be released. The kernel guarantees that this ID will not // be used in further calls to the file system (unless it is reissued by the // file system). Handle HandleID } func (o *ReleaseDirHandleOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to ReleaseDirHandleOp") o.r.(*bazilfuse.ReleaseRequest).Respond() } //////////////////////////////////////////////////////////////////////// // File handles //////////////////////////////////////////////////////////////////////// // Open a file inode. // // On Linux the sends this when setting up a struct file for a particular inode // with type file, usually in response to an open(2) call from a user-space // process. On OS X it may not be sent for every open(2) // (cf.https://github.com/osxfuse/osxfuse/issues/199). type OpenFileOp struct { commonOp // The ID of the inode to be opened. Inode InodeID // Mode and options flags. Flags bazilfuse.OpenFlags // An opaque ID that will be echoed in follow-up calls for this file using // the same struct file in the kernel. In practice this usually means // follow-up calls using the file descriptor returned by open(2). // // The handle may be supplied in future ops like ReadFileOp that contain a // file handle. The file system must ensure this ID remains valid until a // later call to ReleaseFileHandle. Handle HandleID } func (o *OpenFileOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.OpenResponse{ Handle: bazilfuse.HandleID(o.Handle), } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.OpenRequest).Respond(&resp) } // Read data from a file previously opened with CreateFile or OpenFile. // // Note that this op is not sent for every call to read(2) by the end user; // some reads may be served by the page cache. See notes on WriteFileOp for // more. type ReadFileOp struct { commonOp // The file inode that we are reading, and the handle previously returned by // CreateFile or OpenFile when opening that inode. Inode InodeID Handle HandleID // The range of the file to read. // // The FUSE documentation requires that exactly the number of bytes be // returned, except in the case of EOF or error (http://goo.gl/ZgfBkF). This // appears to be because it uses file mmapping machinery // (http://goo.gl/SGxnaN) to read a page at a time. It appears to understand // where EOF is by checking the inode size (http://goo.gl/0BkqKD), returned // by a previous call to LookUpInode, GetInodeAttributes, etc. Offset int64 Size int // Set by the file system: the data read. If this is less than the requested // size, it indicates EOF. An error should not be returned in this case. Data []byte } func (o *ReadFileOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.ReadResponse{ Data: o.Data, } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.ReadRequest).Respond(&resp) } // Write data to a file previously opened with CreateFile or OpenFile. // // When the user writes data using write(2), the write goes into the page // cache and the page is marked dirty. Later the kernel may write back the // page via the FUSE VFS layer, causing this op to be sent: // // * The kernel calls address_space_operations::writepage when a dirty page // needs to be written to backing store (cf. http://goo.gl/Ezbewg). Fuse // sets this to fuse_writepage (cf. http://goo.gl/IeNvLT). // // * (http://goo.gl/Eestuy) fuse_writepage calls fuse_writepage_locked. // // * (http://goo.gl/RqYIxY) fuse_writepage_locked makes a write request to // the userspace server. // // Note that the kernel *will* ensure that writes are received and acknowledged // by the file system before sending a FlushFileOp when closing the file // descriptor to which they were written: // // * (http://goo.gl/PheZjf) fuse_flush calls write_inode_now, which appears // to start a writeback in the background (it talks about a "flusher // thread"). // // * (http://goo.gl/1IiepM) fuse_flush then calls fuse_sync_writes, which // "[waits] for all pending writepages on the inode to finish". // // * (http://goo.gl/zzvxWv) Only then does fuse_flush finally send the // flush request. // // (See also http://goo.gl/ocdTdM, fuse-devel thread "Fuse guarantees on // concurrent requests".) type WriteFileOp struct { commonOp // The file inode that we are modifying, and the handle previously returned // by CreateFile or OpenFile when opening that inode. Inode InodeID Handle HandleID // The offset at which to write the data below. // // The man page for pwrite(2) implies that aside from changing the file // handle's offset, using pwrite is equivalent to using lseek(2) and then // write(2). The man page for lseek(2) says the following: // // "The lseek() function allows the file offset to be set beyond the end of // the file (but this does not change the size of the file). If data is later // written at this point, subsequent reads of the data in the gap (a "hole") // return null bytes (aq\0aq) until data is actually written into the gap." // // It is therefore reasonable to assume that the kernel is looking for // the following semantics: // // * If the offset is less than or equal to the current size, extend the // file as necessary to fit any data that goes past the end of the file. // // * If the offset is greater than the current size, extend the file // with null bytes until it is not, then do the above. // Offset int64 // The data to write. // // The FUSE documentation requires that exactly the number of bytes supplied // be written, except on error (http://goo.gl/KUpwwn). This appears to be // because it uses file mmapping machinery (http://goo.gl/SGxnaN) to write a // page at a time. Data []byte } func (o *WriteFileOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } resp := bazilfuse.WriteResponse{ Size: len(o.Data), } o.Logf("Responding: %v", &resp) o.r.(*bazilfuse.WriteRequest).Respond(&resp) } // Synchronize the current contents of an open file to storage. // // vfs.txt documents this as being called for by the fsync(2) system call // (cf. http://goo.gl/j9X8nB). Code walk for that case: // // * (http://goo.gl/IQkWZa) sys_fsync calls do_fsync, calls vfs_fsync, calls // vfs_fsync_range. // // * (http://goo.gl/5L2SMy) vfs_fsync_range calls f_op->fsync. // // Note that this is also sent by fdatasync(2) (cf. http://goo.gl/01R7rF), and // may be sent for msync(2) with the MS_SYNC flag (see the notes on // FlushFileOp). // // See also: FlushFileOp, which may perform a similar function when closing a // file (but which is not used in "real" file systems). type SyncFileOp struct { commonOp // The file and handle being sync'd. Inode InodeID Handle HandleID } func (o *SyncFileOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to SyncFileOp") o.r.(*bazilfuse.FsyncRequest).Respond() } // Flush the current state of an open file to storage upon closing a file // descriptor. // // vfs.txt documents this as being sent for each close(2) system call (cf. // http://goo.gl/FSkbrq). Code walk for that case: // // * (http://goo.gl/e3lv0e) sys_close calls __close_fd, calls filp_close. // * (http://goo.gl/nI8fxD) filp_close calls f_op->flush (fuse_flush). // // But note that this is also sent in other contexts where a file descriptor is // closed, such as dup2(2) (cf. http://goo.gl/NQDvFS). In the case of close(2), // a flush error is returned to the user. For dup2(2), it is not. // // One potentially significant case where this may not be sent is mmap'd files, // where the behavior is complicated: // // * munmap(2) does not cause flushes (cf. http://goo.gl/j8B9g0). // // * On OS X, if a user modifies a mapped file via the mapping before // closing the file with close(2), the WriteFileOps for the modifications // may not be received before the FlushFileOp for the close(2) (cf. // http://goo.gl/kVmNcx). // // * However, even on OS X you can arrange for writes via a mapping to be // flushed by calling msync(2) followed by close(2). On OS X msync(2) // will cause a WriteFileOps to go through and close(2) will cause a // FlushFile as usual (cf. http://goo.gl/kVmNcx). On Linux, msync(2) does // nothing unless you set the MS_SYNC flag, in which case it causes a // SyncFileOp to be sent (cf. http://goo.gl/P3mErk). // // In summary: if you make data durable in both FlushFile and SyncFile, then // your users can get safe behavior from mapped files on both operating systems // by calling msync(2) with MS_SYNC, followed by munmap(2), followed by // close(2). On Linux, the msync(2) is optional (cf. http://goo.gl/EIhAxv and // the notes on WriteFileOp). // // Because of cases like dup2(2), FlushFileOps are not necessarily one to one // with OpenFileOps. They should not be used for reference counting, and the // handle must remain valid even after the flush op is received (use // ReleaseFileHandleOp for disposing of it). // // Typical "real" file systems do not implement this, presumably relying on // the kernel to write out the page cache to the block device eventually. // They can get away with this because a later open(2) will see the same // data. A file system that writes to remote storage however probably wants // to at least schedule a real flush, and maybe do it immediately in order to // return any errors that occur. type FlushFileOp struct { commonOp // The file and handle being flushed. Inode InodeID Handle HandleID } func (o *FlushFileOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to FlushFileOp") o.r.(*bazilfuse.FlushRequest).Respond() } // Release a previously-minted file handle. The kernel calls this when there // are no more references to an open file: all file descriptors are closed // and all memory mappings are unmapped. // // The kernel guarantees that the handle ID will not be used in further calls // to the file system (unless it is reissued by the file system). // // Errors from this op are ignored by the kernel (cf. http://goo.gl/RL38Do). type ReleaseFileHandleOp struct { commonOp // The handle ID to be released. The kernel guarantees that this ID will not // be used in further calls to the file system (unless it is reissued by the // file system). Handle HandleID } func (o *ReleaseFileHandleOp) Respond(err error) { defer o.commonOp.opsInFlight.Done() if err != nil { o.commonOp.respondErr(err) return } o.Logf("Responding OK to ReleaseFileHandleOp") o.r.(*bazilfuse.ReleaseRequest).Respond() }