Compare commits

...

12 Commits

14 changed files with 601 additions and 156 deletions

View File

@ -151,6 +151,7 @@ func (c *Connection) Init() error {
cacheSymlinks := initOp.Flags&fusekernel.InitCacheSymlinks > 0 cacheSymlinks := initOp.Flags&fusekernel.InitCacheSymlinks > 0
noOpenSupport := initOp.Flags&fusekernel.InitNoOpenSupport > 0 noOpenSupport := initOp.Flags&fusekernel.InitNoOpenSupport > 0
noOpendirSupport := initOp.Flags&fusekernel.InitNoOpendirSupport > 0 noOpendirSupport := initOp.Flags&fusekernel.InitNoOpendirSupport > 0
readdirplusSupport := initOp.Flags&fusekernel.InitDoReaddirplus > 0
// Respond to the init op. // Respond to the init op.
initOp.Library = c.protocol initOp.Library = c.protocol
@ -193,6 +194,11 @@ func (c *Connection) Init() error {
initOp.Flags |= fusekernel.InitNoOpendirSupport initOp.Flags |= fusekernel.InitNoOpendirSupport
} }
// Tell the kernel to do readdirplus (readdir+lookup in one call)
if c.cfg.UseReadDirPlus && readdirplusSupport {
initOp.Flags |= fusekernel.InitDoReaddirplus
}
c.Reply(ctx, nil) c.Reply(ctx, nil)
return nil return nil
} }
@ -369,18 +375,24 @@ func (c *Connection) readMessage() (*buffer.InMessage, error) {
} }
// Write the supplied message to the kernel. // Write the supplied message to the kernel.
func (c *Connection) writeMessage(msg []byte) error { func (c *Connection) writeMessage(outMsg *buffer.OutMessage) error {
var err error
var n int
expectedLen := outMsg.Len()
if outMsg.Sglist != nil {
n, err = writev(int(c.dev.Fd()), outMsg.Sglist)
} else {
// Avoid the retry loop in os.File.Write. // Avoid the retry loop in os.File.Write.
n, err := syscall.Write(int(c.dev.Fd()), msg) n, err = syscall.Write(int(c.dev.Fd()), outMsg.OutHeaderBytes())
if err != nil { }
if err == nil && n != expectedLen {
err = fmt.Errorf("Wrote %d bytes; expected %d", n, expectedLen)
}
if err != nil && c.errorLogger != nil {
c.errorLogger.Printf("writeMessage: %v %v", err, outMsg.OutHeaderBytes())
}
outMsg.Sglist = nil
return err return err
}
if n != len(msg) {
return fmt.Errorf("Wrote %d bytes; expected %d", n, len(msg))
}
return nil
} }
// ReadOp consumes the next op from the kernel process, returning the op and a // ReadOp consumes the next op from the kernel process, returning the op and a
@ -485,8 +497,15 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
outMsg := state.outMsg outMsg := state.outMsg
fuseID := inMsg.Header().Unique fuseID := inMsg.Header().Unique
suppressReuse := false
if wr, ok := op.(*fuseops.WriteFileOp); ok {
suppressReuse = wr.SuppressReuse
}
// Make sure we destroy the messages when we're done. // Make sure we destroy the messages when we're done.
if !suppressReuse {
defer c.putInMessage(inMsg) defer c.putInMessage(inMsg)
}
defer c.putOutMessage(outMsg) defer c.putOutMessage(outMsg)
// Clean up state for this op. // Clean up state for this op.
@ -510,19 +529,21 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
noResponse := c.kernelResponse(outMsg, inMsg.Header().Unique, op, opErr) noResponse := c.kernelResponse(outMsg, inMsg.Header().Unique, op, opErr)
if !noResponse { if !noResponse {
var err error c.writeMessage(outMsg)
if outMsg.Sglist != nil {
_, err = writev(int(c.dev.Fd()), outMsg.Sglist)
} else {
err = c.writeMessage(outMsg.OutHeaderBytes())
}
if err != nil && c.errorLogger != nil {
c.errorLogger.Printf("writeMessage: %v %v", err, outMsg.OutHeaderBytes())
}
outMsg.Sglist = nil
} }
} }
// Send a notification to the kernel
// notification must be a pointer to one of fuseops.NotifyXXX structures
// To avoid a deadlock notifications must not be called in the execution path of a related filesytem operation or within any code that could hold a lock that could be needed to execute such an operation. As of kernel 4.18, a "related operation" is a lookup(), symlink(), mknod(), mkdir(), unlink(), rename(), link() or create() request for the parent, and a setattr(), unlink(), rmdir(), rename(), setxattr(), removexattr(), readdir() or readdirplus() request for the inode itself.
func (c *Connection) Notify(notification interface{}) error {
outMsg := c.getOutMessage()
defer c.putOutMessage(outMsg)
c.kernelNotification(outMsg, notification)
outMsg.OutHeader().Len = uint32(outMsg.Len())
return c.writeMessage(outMsg)
}
// Close the connection. Must not be called until operations that were read // Close the connection. Must not be called until operations that were read
// from the connection have been responded to. // from the connection have been responded to.
func (c *Connection) close() error { func (c *Connection) close() error {

View File

@ -89,7 +89,7 @@ func convertInMessage(
} }
if valid&fusekernel.SetattrMode != 0 { if valid&fusekernel.SetattrMode != 0 {
mode := convertFileMode(in.Mode) mode := fuseops.ConvertFileMode(in.Mode)
to.Mode = &mode to.Mode = &mode
} }
@ -170,7 +170,7 @@ func convertInMessage(
// the fact that this is a directory is implicit in the fact that the // the fact that this is a directory is implicit in the fact that the
// opcode is mkdir. But we want the correct mode to go through, so ensure // opcode is mkdir. But we want the correct mode to go through, so ensure
// that os.ModeDir is set. // that os.ModeDir is set.
Mode: convertFileMode(in.Mode) | os.ModeDir, Mode: fuseops.ConvertFileMode(in.Mode) | os.ModeDir,
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid}, OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
} }
@ -190,7 +190,8 @@ func convertInMessage(
o = &fuseops.MkNodeOp{ o = &fuseops.MkNodeOp{
Parent: fuseops.InodeID(inMsg.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(name), Name: string(name),
Mode: convertFileMode(in.Mode), Mode: fuseops.ConvertFileMode(in.Mode),
Rdev: in.Rdev,
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid}, OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
} }
@ -210,7 +211,7 @@ func convertInMessage(
o = &fuseops.CreateFileOp{ o = &fuseops.CreateFileOp{
Parent: fuseops.InodeID(inMsg.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(name), Name: string(name),
Mode: convertFileMode(in.Mode), Mode: fuseops.ConvertFileMode(in.Mode),
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid}, OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
} }
@ -340,6 +341,8 @@ func convertInMessage(
} }
o = to o = to
case fusekernel.OpReaddirplus:
fallthrough
case fusekernel.OpReaddir: case fusekernel.OpReaddir:
in := (*fusekernel.ReadIn)(inMsg.Consume(fusekernel.ReadInSize(protocol))) in := (*fusekernel.ReadIn)(inMsg.Consume(fusekernel.ReadInSize(protocol)))
if in == nil { if in == nil {
@ -350,6 +353,7 @@ func convertInMessage(
Inode: fuseops.InodeID(inMsg.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
Offset: fuseops.DirOffset(in.Offset), Offset: fuseops.DirOffset(in.Offset),
Plus: inMsg.Header().Opcode == fusekernel.OpReaddirplus,
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid}, OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
} }
o = to o = to
@ -606,6 +610,42 @@ func convertInMessage(
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid}, OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
} }
case fusekernel.OpPoll:
type input fusekernel.PollIn
in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil {
return nil, errors.New("Corrupt OpPoll")
}
o = &fuseops.PollOp{
Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh),
Kh: in.Kh,
Flags: fusekernel.PollFlags(in.Flags),
Events: fusekernel.PollEvents(in.Events),
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
}
case fusekernel.OpNotifyReply:
type input fusekernel.NotifyRetrieveIn
in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil {
return nil, errors.New("Corrupt OpNotifyReply")
}
buf := inMsg.ConsumeBytes(inMsg.Len())
if len(buf) < int(in.Size) {
return nil, errors.New("Corrupt OpNotifyReply")
}
o = &fuseops.NotifyRetrieveReplyOp{
Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Unique: inMsg.Header().Unique,
Offset: in.Offset,
Length: in.Size,
OpContext: fuseops.OpContext{Pid: inMsg.Header().Pid},
}
default: default:
o = &unknownOp{ o = &unknownOp{
OpCode: inMsg.Header().Opcode, OpCode: inMsg.Header().Opcode,
@ -639,6 +679,9 @@ func (c *Connection) kernelResponse(
case *fuseops.BatchForgetOp: case *fuseops.BatchForgetOp:
return true return true
case *fuseops.NotifyRetrieveReplyOp:
return true
case *interruptOp: case *interruptOp:
return true return true
} }
@ -682,37 +725,37 @@ func (c *Connection) kernelResponseForOp(
case *fuseops.LookUpInodeOp: case *fuseops.LookUpInodeOp:
size := int(fusekernel.EntryOutSize(c.protocol)) size := int(fusekernel.EntryOutSize(c.protocol))
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) fuseops.ConvertChildInodeEntry(&o.Entry, out)
case *fuseops.GetInodeAttributesOp: case *fuseops.GetInodeAttributesOp:
size := int(fusekernel.AttrOutSize(c.protocol)) size := int(fusekernel.AttrOutSize(c.protocol))
out := (*fusekernel.AttrOut)(m.Grow(size)) out := (*fusekernel.AttrOut)(m.Grow(size))
out.AttrValid, out.AttrValidNsec = convertExpirationTime( out.AttrValid, out.AttrValidNsec = fuseops.ConvertExpirationTime(
o.AttributesExpiration) o.AttributesExpiration)
convertAttributes(o.Inode, &o.Attributes, &out.Attr) fuseops.ConvertAttributes(o.Inode, &o.Attributes, &out.Attr)
case *fuseops.SetInodeAttributesOp: case *fuseops.SetInodeAttributesOp:
size := int(fusekernel.AttrOutSize(c.protocol)) size := int(fusekernel.AttrOutSize(c.protocol))
out := (*fusekernel.AttrOut)(m.Grow(size)) out := (*fusekernel.AttrOut)(m.Grow(size))
out.AttrValid, out.AttrValidNsec = convertExpirationTime( out.AttrValid, out.AttrValidNsec = fuseops.ConvertExpirationTime(
o.AttributesExpiration) o.AttributesExpiration)
convertAttributes(o.Inode, &o.Attributes, &out.Attr) fuseops.ConvertAttributes(o.Inode, &o.Attributes, &out.Attr)
case *fuseops.MkDirOp: case *fuseops.MkDirOp:
size := int(fusekernel.EntryOutSize(c.protocol)) size := int(fusekernel.EntryOutSize(c.protocol))
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) fuseops.ConvertChildInodeEntry(&o.Entry, out)
case *fuseops.MkNodeOp: case *fuseops.MkNodeOp:
size := int(fusekernel.EntryOutSize(c.protocol)) size := int(fusekernel.EntryOutSize(c.protocol))
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) fuseops.ConvertChildInodeEntry(&o.Entry, out)
case *fuseops.CreateFileOp: case *fuseops.CreateFileOp:
eSize := int(fusekernel.EntryOutSize(c.protocol)) eSize := int(fusekernel.EntryOutSize(c.protocol))
e := (*fusekernel.EntryOut)(m.Grow(eSize)) e := (*fusekernel.EntryOut)(m.Grow(eSize))
convertChildInodeEntry(&o.Entry, e) fuseops.ConvertChildInodeEntry(&o.Entry, e)
oo := (*fusekernel.OpenOut)(m.Grow(int(unsafe.Sizeof(fusekernel.OpenOut{})))) oo := (*fusekernel.OpenOut)(m.Grow(int(unsafe.Sizeof(fusekernel.OpenOut{}))))
oo.Fh = uint64(o.Handle) oo.Fh = uint64(o.Handle)
@ -720,12 +763,12 @@ func (c *Connection) kernelResponseForOp(
case *fuseops.CreateSymlinkOp: case *fuseops.CreateSymlinkOp:
size := int(fusekernel.EntryOutSize(c.protocol)) size := int(fusekernel.EntryOutSize(c.protocol))
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) fuseops.ConvertChildInodeEntry(&o.Entry, out)
case *fuseops.CreateLinkOp: case *fuseops.CreateLinkOp:
size := int(fusekernel.EntryOutSize(c.protocol)) size := int(fusekernel.EntryOutSize(c.protocol))
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) fuseops.ConvertChildInodeEntry(&o.Entry, out)
case *fuseops.RenameOp: case *fuseops.RenameOp:
// Empty response // Empty response
@ -862,6 +905,13 @@ func (c *Connection) kernelResponseForOp(
out.TimeGran = 1 out.TimeGran = 1
out.MaxPages = o.MaxPages out.MaxPages = o.MaxPages
case *fuseops.PollOp:
out := (*fusekernel.PollOut)(m.Grow(int(unsafe.Sizeof(fusekernel.PollOut{}))))
out.Revents = uint32(o.Revents)
case *fuseops.NotifyRetrieveReplyOp:
// Empty response
default: default:
panic(fmt.Sprintf("Unexpected op: %#v", op)) panic(fmt.Sprintf("Unexpected op: %#v", op))
} }
@ -869,124 +919,73 @@ func (c *Connection) kernelResponseForOp(
return return
} }
// Like kernelResponse, but assumes the user replied with a nil error to the op.
func (c *Connection) kernelNotification(
m *buffer.OutMessage,
op interface{}) {
h := m.OutHeader()
h.Unique = 0
// Create the appropriate output message
switch o := op.(type) {
case *fuseops.NotifyPollWakeup:
h.Error = fusekernel.NotifyCodePoll
out := (*fusekernel.NotifyPollWakeupOut)(m.Grow(int(unsafe.Sizeof(fusekernel.NotifyPollWakeupOut{}))))
out.Kh = o.Kh
case *fuseops.NotifyInvalInode:
h.Error = fusekernel.NotifyCodeInvalInode
out := (*fusekernel.NotifyInvalInodeOut)(m.Grow(int(unsafe.Sizeof(fusekernel.NotifyInvalInodeOut{}))))
out.Ino = uint64(o.Inode)
out.Off = o.Offset
out.Len = o.Length
case *fuseops.NotifyInvalEntry:
h.Error = fusekernel.NotifyCodeInvalEntry
out := (*fusekernel.NotifyInvalEntryOut)(m.Grow(int(unsafe.Sizeof(fusekernel.NotifyInvalEntryOut{}))))
out.Parent = uint64(o.Parent)
out.Namelen = uint32(len(o.Name))
m.AppendString(o.Name)
m.AppendString("\x00")
case *fuseops.NotifyDelete:
h.Error = fusekernel.NotifyCodeDelete
out := (*fusekernel.NotifyDeleteOut)(m.Grow(int(unsafe.Sizeof(fusekernel.NotifyDeleteOut{}))))
out.Parent = uint64(o.Parent)
out.Child = uint64(o.Child)
out.Namelen = uint32(len(o.Name))
m.AppendString(o.Name)
m.AppendString("\x00")
case *fuseops.NotifyStore:
h.Error = fusekernel.NotifyCodeStore
out := (*fusekernel.NotifyStoreOut)(m.Grow(int(unsafe.Sizeof(fusekernel.NotifyStoreOut{}))))
out.Nodeid = uint64(o.Inode)
out.Offset = o.Offset
out.Size = o.Length
m.Append(o.Data...)
m.ShrinkTo(buffer.OutMessageHeaderSize + int(unsafe.Sizeof(fusekernel.NotifyStoreOut{})) + int(o.Length))
case *fuseops.NotifyRetrieve:
h.Error = fusekernel.NotifyCodeRetrieve
out := (*fusekernel.NotifyRetrieveOut)(m.Grow(int(unsafe.Sizeof(fusekernel.NotifyRetrieveOut{}))))
out.Unique = o.Unique
out.Nodeid = uint64(o.Inode)
out.Offset = o.Offset
out.Size = o.Length
default:
panic(fmt.Sprintf("Unexpected notification: %#v", op))
}
return
}
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// General conversions // General conversions
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
func convertTime(t time.Time) (secs uint64, nsec uint32) {
totalNano := t.UnixNano()
secs = uint64(totalNano / 1e9)
nsec = uint32(totalNano % 1e9)
return secs, nsec
}
func convertAttributes(
inodeID fuseops.InodeID,
in *fuseops.InodeAttributes,
out *fusekernel.Attr) {
out.Ino = uint64(inodeID)
out.Size = in.Size
out.Atime, out.AtimeNsec = convertTime(in.Atime)
out.Mtime, out.MtimeNsec = convertTime(in.Mtime)
out.Ctime, out.CtimeNsec = convertTime(in.Ctime)
out.SetCrtime(convertTime(in.Crtime))
out.Nlink = in.Nlink
out.Uid = in.Uid
out.Gid = in.Gid
// round up to the nearest 512 boundary
out.Blocks = (in.Size + 512 - 1) / 512
// Set the mode.
out.Mode = uint32(in.Mode) & 0777
switch {
default:
out.Mode |= syscall.S_IFREG
case in.Mode&os.ModeDir != 0:
out.Mode |= syscall.S_IFDIR
case in.Mode&os.ModeDevice != 0:
if in.Mode&os.ModeCharDevice != 0 {
out.Mode |= syscall.S_IFCHR
} else {
out.Mode |= syscall.S_IFBLK
}
case in.Mode&os.ModeNamedPipe != 0:
out.Mode |= syscall.S_IFIFO
case in.Mode&os.ModeSymlink != 0:
out.Mode |= syscall.S_IFLNK
case in.Mode&os.ModeSocket != 0:
out.Mode |= syscall.S_IFSOCK
}
if in.Mode&os.ModeSetuid != 0 {
out.Mode |= syscall.S_ISUID
}
if in.Mode&os.ModeSetgid != 0 {
out.Mode |= syscall.S_ISGID
}
if in.Mode&os.ModeSticky != 0 {
out.Mode |= syscall.S_ISVTX
}
}
// Convert an absolute cache expiration time to a relative time from now for
// consumption by the fuse kernel module.
func convertExpirationTime(t time.Time) (secs uint64, nsecs uint32) {
// Fuse represents durations as unsigned 64-bit counts of seconds and 32-bit
// counts of nanoseconds (cf. http://goo.gl/EJupJV). So negative durations
// are right out. There is no need to cap the positive magnitude, because
// 2^64 seconds is well longer than the 2^63 ns range of time.Duration.
d := t.Sub(time.Now())
if d > 0 {
secs = uint64(d / time.Second)
nsecs = uint32((d % time.Second) / time.Nanosecond)
}
return secs, nsecs
}
func convertChildInodeEntry(
in *fuseops.ChildInodeEntry,
out *fusekernel.EntryOut) {
out.Nodeid = uint64(in.Child)
out.Generation = uint64(in.Generation)
out.EntryValid, out.EntryValidNsec = convertExpirationTime(in.EntryExpiration)
out.AttrValid, out.AttrValidNsec = convertExpirationTime(in.AttributesExpiration)
convertAttributes(in.Child, &in.Attributes, &out.Attr)
}
func convertFileMode(unixMode uint32) os.FileMode {
mode := os.FileMode(unixMode & 0777)
switch unixMode & syscall.S_IFMT {
case syscall.S_IFREG:
// nothing
case syscall.S_IFDIR:
mode |= os.ModeDir
case syscall.S_IFCHR:
mode |= os.ModeCharDevice | os.ModeDevice
case syscall.S_IFBLK:
mode |= os.ModeDevice
case syscall.S_IFIFO:
mode |= os.ModeNamedPipe
case syscall.S_IFLNK:
mode |= os.ModeSymlink
case syscall.S_IFSOCK:
mode |= os.ModeSocket
default:
// no idea
mode |= os.ModeDevice
}
if unixMode&syscall.S_ISUID != 0 {
mode |= os.ModeSetuid
}
if unixMode&syscall.S_ISGID != 0 {
mode |= os.ModeSetgid
}
if unixMode&syscall.S_ISVTX != 0 {
mode |= os.ModeSticky
}
return mode
}
func writeXattrSize(m *buffer.OutMessage, size uint32) { func writeXattrSize(m *buffer.OutMessage, size uint32) {
out := (*fusekernel.GetxattrOut)(m.Grow(int(unsafe.Sizeof(fusekernel.GetxattrOut{})))) out := (*fusekernel.GetxattrOut)(m.Grow(int(unsafe.Sizeof(fusekernel.GetxattrOut{}))))
out.Size = size out.Size = size

84
fuseops/conv.go Normal file
View File

@ -0,0 +1,84 @@
// Copyright 2023 Vitaliy Filippov
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fuseops
import (
"time"
"syscall"
"github.com/jacobsa/fuse/internal/fusekernel"
)
////////////////////////////////////////////////////////////////////////
// General conversions
////////////////////////////////////////////////////////////////////////
func ConvertTime(t time.Time) (secs uint64, nsec uint32) {
totalNano := t.UnixNano()
secs = uint64(totalNano / 1e9)
nsec = uint32(totalNano % 1e9)
return secs, nsec
}
func ConvertAttributes(
inodeID InodeID,
in *InodeAttributes,
out *fusekernel.Attr) {
out.Ino = uint64(inodeID)
out.Size = in.Size
out.Atime, out.AtimeNsec = ConvertTime(in.Atime)
out.Mtime, out.MtimeNsec = ConvertTime(in.Mtime)
out.Ctime, out.CtimeNsec = ConvertTime(in.Ctime)
out.SetCrtime(ConvertTime(in.Crtime))
out.Nlink = in.Nlink
out.Uid = in.Uid
out.Gid = in.Gid
// round up to the nearest 512 boundary
out.Blocks = (in.Size + 512 - 1) / 512
// Set the mode.
out.Mode = ConvertGolangMode(in.Mode)
if out.Mode & (syscall.S_IFCHR | syscall.S_IFBLK) != 0 {
out.Rdev = in.Rdev
}
}
// Convert an absolute cache expiration time to a relative time from now for
// consumption by the fuse kernel module.
func ConvertExpirationTime(t time.Time) (secs uint64, nsecs uint32) {
// Fuse represents durations as unsigned 64-bit counts of seconds and 32-bit
// counts of nanoseconds (cf. http://goo.gl/EJupJV). So negative durations
// are right out. There is no need to cap the positive magnitude, because
// 2^64 seconds is well longer than the 2^63 ns range of time.Duration.
d := t.Sub(time.Now())
if d > 0 {
secs = uint64(d / time.Second)
nsecs = uint32((d % time.Second) / time.Nanosecond)
}
return secs, nsecs
}
func ConvertChildInodeEntry(
in *ChildInodeEntry,
out *fusekernel.EntryOut) {
out.Nodeid = uint64(in.Child)
out.Generation = uint64(in.Generation)
out.EntryValid, out.EntryValidNsec = ConvertExpirationTime(in.EntryExpiration)
out.AttrValid, out.AttrValidNsec = ConvertExpirationTime(in.AttributesExpiration)
ConvertAttributes(in.Child, &in.Attributes, &out.Attr)
}

70
fuseops/filemode.go Normal file
View File

@ -0,0 +1,70 @@
package fuseops
import (
"os"
"syscall"
)
func ConvertFileMode(unixMode uint32) os.FileMode {
mode := os.FileMode(unixMode & 0777)
switch unixMode & syscall.S_IFMT {
case syscall.S_IFREG:
// nothing
case syscall.S_IFDIR:
mode |= os.ModeDir
case syscall.S_IFCHR:
mode |= os.ModeCharDevice | os.ModeDevice
case syscall.S_IFBLK:
mode |= os.ModeDevice
case syscall.S_IFIFO:
mode |= os.ModeNamedPipe
case syscall.S_IFLNK:
mode |= os.ModeSymlink
case syscall.S_IFSOCK:
mode |= os.ModeSocket
default:
// no idea
}
if unixMode&syscall.S_ISUID != 0 {
mode |= os.ModeSetuid
}
if unixMode&syscall.S_ISGID != 0 {
mode |= os.ModeSetgid
}
if unixMode&syscall.S_ISVTX != 0 {
mode |= os.ModeSticky
}
return mode
}
func ConvertGolangMode(inMode os.FileMode) uint32 {
outMode := uint32(inMode) & 0777
switch {
default:
outMode |= syscall.S_IFREG
case inMode&os.ModeDir != 0:
outMode |= syscall.S_IFDIR
case inMode&os.ModeDevice != 0:
if inMode&os.ModeCharDevice != 0 {
outMode |= syscall.S_IFCHR
} else {
outMode |= syscall.S_IFBLK
}
case inMode&os.ModeNamedPipe != 0:
outMode |= syscall.S_IFIFO
case inMode&os.ModeSymlink != 0:
outMode |= syscall.S_IFLNK
case inMode&os.ModeSocket != 0:
outMode |= syscall.S_IFSOCK
}
if inMode&os.ModeSetuid != 0 {
outMode |= syscall.S_ISUID
}
if inMode&os.ModeSetgid != 0 {
outMode |= syscall.S_ISGID
}
if inMode&os.ModeSticky != 0 {
outMode |= syscall.S_ISVTX
}
return outMode
}

View File

@ -301,6 +301,9 @@ type MkNodeOp struct {
Name string Name string
Mode os.FileMode Mode os.FileMode
// The device number (only valid if created file is a device)
Rdev uint32
// Set by the file system: information about the inode that was created. // Set by the file system: information about the inode that was created.
// //
// The lookup count for the inode is implicitly incremented. See notes on // The lookup count for the inode is implicitly incremented. See notes on
@ -559,12 +562,18 @@ type ReadDirOp struct {
// offset, and return array offsets into that cached listing. // offset, and return array offsets into that cached listing.
Offset DirOffset Offset DirOffset
// Whether this operation is a READDIRPLUS
//
// If true, then the FS must return inode attributes and expiration time
// along with each directory entry and increment its reference count.
Plus bool
// The destination buffer, whose length gives the size of the read. // The destination buffer, whose length gives the size of the read.
// //
// The output data should consist of a sequence of FUSE directory entries in // The output data should consist of a sequence of FUSE directory entries in
// the format generated by fuse_add_direntry (http://goo.gl/qCcHCV), which is // the format generated by fuse_add_direntry (http://goo.gl/qCcHCV), which is
// consumed by parse_dirfile (http://goo.gl/2WUmD2). Use fuseutil.WriteDirent // consumed by parse_dirfile (http://goo.gl/2WUmD2). Use fuseutil.WriteDirent
// to generate this data. // or fuseutil.WriteDirentPlus to generate this data.
// //
// Each entry returned exposes a directory offset to the user that may later // Each entry returned exposes a directory offset to the user that may later
// show up in ReadDirRequest.Offset. See notes on that field for more // show up in ReadDirRequest.Offset. See notes on that field for more
@ -750,6 +759,17 @@ type WriteFileOp struct {
// because it uses file mmapping machinery (http://goo.gl/SGxnaN) to write a // because it uses file mmapping machinery (http://goo.gl/SGxnaN) to write a
// page at a time. // page at a time.
Data []byte Data []byte
// Set by the file system: "no reuse" flag.
//
// By default, the Data buffer is reused by the library, so the file system
// must copy the data if it wants to use it later.
//
// However, if the file system sets this flag to true, the library doesn't
// reuse this buffer, so the file system can safely store and use Data slice
// without copying memory.
SuppressReuse bool
OpContext OpContext OpContext OpContext
} }
@ -962,3 +982,127 @@ type FallocateOp struct {
Mode uint32 Mode uint32
OpContext OpContext OpContext OpContext
} }
// Request notifications when the file system user calls poll/select or
// similar operations on a file.
type PollOp struct {
// The inode and handle the user wants to poll
Inode InodeID
Handle HandleID
// Kh is the "kernel handle". The reason behind it is that it's allocated
// by the kernel on file allocation and guaranteed to be unique as opposed
// to regular file handles (HandleID) generated by the userland server
// (by us). Kh has to be used in NotifyPollWakeupOut replies.
Kh uint64
// Poll flags
Flags fusekernel.PollFlags
// Requested events
Events fusekernel.PollEvents
// Set by the file system: the actual events that have happened
// since the last poll
Revents fusekernel.PollEvents
OpContext OpContext
}
// Notify consumers waiting for poll/epoll that events are incoming
// for the specified kernel handle. The kernel will send a PollOp request
// to get the event mask after receiving this notification
type NotifyPollWakeup struct {
Kh uint64
}
// Notify to invalidate cache for an inode.
//
// If the filesystem has writeback caching enabled, invalidating an inode
// will first trigger a writeback of all dirty pages. The call will block
// until all writeback requests have completed and the inode has been
// invalidated. It will, however, not wait for completion of pending writeback
// requests that have been issued before.
type NotifyInvalInode struct {
Inode InodeID
Offset int64
Length int64
}
// Notify to invalidate parent attributes and the dentry matching parent/name
//
// To avoid a deadlock this request must not be sent in the execution path
// of a related filesytem operation or within any code that could hold a lock
// that could be needed to execute such an operation. As of kernel 4.18, a
// "related operation" is a lookup(), symlink(), mknod(), mkdir(), unlink(),
// rename(), link() or create() request for the parent, and a setattr(),
// unlink(), rmdir(), rename(), setxattr(), removexattr(), readdir() or
// readdirplus() request for the inode itself.
//
// When called correctly, it will never block.
type NotifyInvalEntry struct {
Parent InodeID
Name string
}
// This request behaves like NotifyInvalEntry with the following additional
// effect (at least as of Linux kernel 4.8):
//
// If the provided child inode matches the inode that is currently associated
// with the cached dentry, and if there are any inotify watches registered for
// the dentry, then the watchers are informed that the dentry has been deleted.
//
// To avoid a deadlock this request must not be sent while executing a
// related filesytem operation or while holding a lock that could be needed to
// execute such an operation.
type NotifyDelete struct {
Parent InodeID
Child InodeID
Name string
}
// Store data to the kernel buffers
//
// Synchronously store data in the kernel buffers belonging to the given inode.
// The stored data is marked up-to-date (no read will be performed against it,
// unless it's invalidated or evicted from the cache).
//
// If the stored data overflows the current file size, then the size is extended,
// similarly to a write(2) on the filesystem.
//
// If this request returns an error, then the store wasn't fully completed, but
// it may have been partially completed.
type NotifyStore struct {
Inode InodeID
Offset uint64
Length uint32
Data [][]byte
}
// Retrieve data from the kernel buffers belonging to the given inode
//
// If successful then the kernel will send a NotifyRetrieveReplyOp as a reply.
// Only present pages are returned in the retrieve reply. Retrieving stops when it
// finds a non-present page and only data prior to that is returned.
//
// If this request returns an error, then the retrieve will not be completed and
// no reply will be sent.
//
// This request doesn't change the dirty state of pages in the kernel buffer. For
// dirty pages the write() method will be called regardless of having been retrieved
// previously.
type NotifyRetrieve struct {
Inode InodeID
Unique uint64
Offset uint64
Length uint32
}
// Matches the size of WriteIn
type NotifyRetrieveReplyOp struct {
Inode InodeID
Unique uint64
Offset uint64
Length uint32
OpContext OpContext
}

View File

@ -87,6 +87,9 @@ type InodeAttributes struct {
// //
Mode os.FileMode Mode os.FileMode
// The device number. Only valid if the file is a device
Rdev uint32
// Time information. See `man 2 stat` for full details. // Time information. See `man 2 stat` for full details.
Atime time.Time // Time of last access Atime time.Time // Time of last access
Mtime time.Time // Time of last modification Mtime time.Time // Time of last modification

View File

@ -19,6 +19,7 @@ import (
"unsafe" "unsafe"
"github.com/jacobsa/fuse/fuseops" "github.com/jacobsa/fuse/fuseops"
"github.com/jacobsa/fuse/internal/fusekernel"
) )
type DirentType uint32 type DirentType uint32
@ -50,10 +51,18 @@ type Dirent struct {
Type DirentType Type DirentType
} }
// Write the supplied directory entry intto the given buffer in the format // Write the supplied directory entry into the given buffer in the format
// expected in fuseops.ReadFileOp.Data, returning the number of bytes written. // expected in fuseops.ReadDirOp.Data, returning the number of bytes written.
// Return zero if the entry would not fit. // Return zero if the entry would not fit.
func WriteDirent(buf []byte, d Dirent) (n int) { func WriteDirent(buf []byte, d Dirent) (n int) {
return WriteDirentPlus(buf, nil, d)
}
// Write the supplied directory entry and, optionally, inode entry into the
// given buffer in the format expected in fuseops.ReadDirOp.Data with enabled
// READDIRPLUS capability, returning the number of bytes written.
// Returns zero if the entry would not fit.
func WriteDirentPlus(buf []byte, e *fuseops.ChildInodeEntry, d Dirent) (n int) {
// We want to write bytes with the layout of fuse_dirent // We want to write bytes with the layout of fuse_dirent
// (http://goo.gl/BmFxob) in host order. The struct must be aligned according // (http://goo.gl/BmFxob) in host order. The struct must be aligned according
// to FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH), which dictates 8-byte // to FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH), which dictates 8-byte
@ -78,10 +87,21 @@ func WriteDirent(buf []byte, d Dirent) (n int) {
// Do we have enough room? // Do we have enough room?
totalLen := direntSize + len(d.Name) + padLen totalLen := direntSize + len(d.Name) + padLen
if e != nil {
// READDIRPLUS was added in protocol 7.21, entry attributes were added in 7.9
// So here EntryOut is always full-length
totalLen += int(unsafe.Sizeof(fusekernel.EntryOut{}))
}
if totalLen > len(buf) { if totalLen > len(buf) {
return n return n
} }
if e != nil {
out := (*fusekernel.EntryOut)(unsafe.Pointer(&buf[n]))
fuseops.ConvertChildInodeEntry(e, out)
n += int(unsafe.Sizeof(fusekernel.EntryOut{}))
}
// Write the header. // Write the header.
de := fuse_dirent{ de := fuse_dirent{
ino: uint64(d.Inode), ino: uint64(d.Inode),

View File

@ -63,6 +63,9 @@ type FileSystem interface {
ListXattr(context.Context, *fuseops.ListXattrOp) error ListXattr(context.Context, *fuseops.ListXattrOp) error
SetXattr(context.Context, *fuseops.SetXattrOp) error SetXattr(context.Context, *fuseops.SetXattrOp) error
Fallocate(context.Context, *fuseops.FallocateOp) error Fallocate(context.Context, *fuseops.FallocateOp) error
Poll(context.Context, *fuseops.PollOp) error
SetConnection(*fuse.Connection)
// Regard all inodes (including the root inode) as having their lookup counts // Regard all inodes (including the root inode) as having their lookup counts
// decremented to zero, and clean up any resources associated with the file // decremented to zero, and clean up any resources associated with the file
@ -95,6 +98,8 @@ type fileSystemServer struct {
} }
func (s *fileSystemServer) ServeOps(c *fuse.Connection) { func (s *fileSystemServer) ServeOps(c *fuse.Connection) {
s.fs.SetConnection(c)
// When we are done, we clean up by waiting for all in-flight ops then // When we are done, we clean up by waiting for all in-flight ops then
// destroying the file system. // destroying the file system.
defer func() { defer func() {
@ -236,6 +241,9 @@ func (s *fileSystemServer) handleOp(
case *fuseops.FallocateOp: case *fuseops.FallocateOp:
err = s.fs.Fallocate(ctx, typed) err = s.fs.Fallocate(ctx, typed)
case *fuseops.PollOp:
err = s.fs.Poll(ctx, typed)
} }
c.Reply(ctx, err) c.Reply(ctx, err)

View File

@ -204,5 +204,14 @@ func (fs *NotImplementedFileSystem) Fallocate(
return fuse.ENOSYS return fuse.ENOSYS
} }
func (fs *NotImplementedFileSystem) Poll(
ctx context.Context,
op *fuseops.PollOp) error {
return fuse.ENOSYS
}
func (fs *NotImplementedFileSystem) SetConnection(*fuse.Connection) {
}
func (fs *NotImplementedFileSystem) Destroy() { func (fs *NotImplementedFileSystem) Destroy() {
} }

View File

@ -17,4 +17,4 @@ package buffer
// The maximum fuse write request size that InMessage can acommodate. // The maximum fuse write request size that InMessage can acommodate.
// //
// As of kernel 4.20 Linux accepts writes up to 256 pages or 1MiB // As of kernel 4.20 Linux accepts writes up to 256 pages or 1MiB
const MaxWriteSize = 1 << 20 const MaxWriteSize = 1 << 17

View File

@ -168,7 +168,7 @@ const (
// OpenAccessModeMask is a bitmask that separates the access mode // OpenAccessModeMask is a bitmask that separates the access mode
// from the other flags in OpenFlags. // from the other flags in OpenFlags.
const OpenAccessModeMask OpenFlags = syscall.O_ACCMODE const OpenAccessModeMask OpenFlags = OpenReadOnly | OpenWriteOnly | OpenReadWrite
// OpenFlags are the O_FOO flags passed to open/create/etc calls. For // OpenFlags are the O_FOO flags passed to open/create/etc calls. For
// example, os.O_WRONLY | os.O_APPEND. // example, os.O_WRONLY | os.O_APPEND.
@ -346,6 +346,34 @@ var releaseFlagNames = []flagName{
{uint32(ReleaseFlush), "ReleaseFlush"}, {uint32(ReleaseFlush), "ReleaseFlush"},
} }
// Poll flags and events are used in the Poll exchange.
type PollFlags uint32
const (
// From the kernel source:
// Ask for notification if there's someone waiting for it.
// The client may ignore the flag and always notify.
PollScheduleNotify PollFlags = 1 << 0
)
type PollEvents uint32
const (
PollInEvent PollEvents = 0x0001
PollPriEvent PollEvents = 0x0002
PollOutEvent PollEvents = 0x0004
PollErrEvent PollEvents = 0x0008
PollHupEvent PollEvents = 0x0010
PollNvalEvent PollEvents = 0x0020
PollRdNormEvent PollEvents = 0x0040
PollRdBandEvent PollEvents = 0x0080
PollWrNormEvent PollEvents = 0x0100
PollWrBandEvent PollEvents = 0x0200
PollMsgEvent PollEvents = 0x0400
PollRemoveEvent PollEvents = 0x1000
PollRdHupEvent PollEvents = 0x2000
)
// Opcodes // Opcodes
const ( const (
OpLookup = 1 OpLookup = 1
@ -386,8 +414,10 @@ const (
OpDestroy = 38 OpDestroy = 38
OpIoctl = 39 // Linux? OpIoctl = 39 // Linux?
OpPoll = 40 // Linux? OpPoll = 40 // Linux?
OpNotifyReply = 41
OpBatchForget = 42 OpBatchForget = 42
OpFallocate = 43 OpFallocate = 43
OpReaddirplus = 44
// OS X // OS X
OpSetvolname = 61 OpSetvolname = 61
@ -552,6 +582,18 @@ func CreateInSize(p Protocol) uintptr {
} }
} }
type PollIn struct {
Fh uint64
Kh uint64
Flags uint32
Events uint32
}
type PollOut struct {
Revents uint32
padding uint32
}
type ReleaseIn struct { type ReleaseIn struct {
Fh uint64 Fh uint64
Flags uint32 Flags uint32
@ -787,8 +829,15 @@ const (
NotifyCodePoll int32 = 1 NotifyCodePoll int32 = 1
NotifyCodeInvalInode int32 = 2 NotifyCodeInvalInode int32 = 2
NotifyCodeInvalEntry int32 = 3 NotifyCodeInvalEntry int32 = 3
NotifyCodeStore int32 = 4
NotifyCodeRetrieve int32 = 5
NotifyCodeDelete int32 = 6
) )
type NotifyPollWakeupOut struct {
Kh uint64
}
type NotifyInvalInodeOut struct { type NotifyInvalInodeOut struct {
Ino uint64 Ino uint64
Off int64 Off int64
@ -800,3 +849,35 @@ type NotifyInvalEntryOut struct {
Namelen uint32 Namelen uint32
padding uint32 padding uint32
} }
type NotifyDeleteOut struct {
Parent uint64
Child uint64
Namelen uint32
padding uint32
}
type NotifyStoreOut struct {
Nodeid uint64
Offset uint64
Size uint32
padding uint32
}
type NotifyRetrieveOut struct {
Unique uint64
Nodeid uint64
Offset uint64
Size uint32
padding uint32
}
// Matches the size of WriteIn
type NotifyRetrieveIn struct {
dummy1 uint64
Offset uint64
Size uint32
dummy2 uint32
dummy3 uint64
dummy4 uint64
}

View File

@ -1,3 +1,5 @@
// +build linux windows
package fusekernel package fusekernel
import "time" import "time"

View File

@ -1 +0,0 @@
package fusekernel

View File

@ -151,6 +151,11 @@ type MountConfig struct {
// OpenDir calls at all (Linux >= 5.1): // OpenDir calls at all (Linux >= 5.1):
EnableNoOpendirSupport bool EnableNoOpendirSupport bool
// Tell the kernel to use READDIRPLUS.
// Note that the implementation may still fall back to READDIR if the running
// kernel doesn't have support for READDIRPLUS.
UseReadDirPlus bool
// Disable FUSE default permissions. // Disable FUSE default permissions.
// This is useful for situations where the backing data store (e.g., S3) doesn't // This is useful for situations where the backing data store (e.g., S3) doesn't
// actually utilise any form of qualifiable UNIX permissions. // actually utilise any form of qualifiable UNIX permissions.