Eliminated an allocation and a copy for each ReadFileOp.

This adds up to a significant performance gain for gcsfuse, in its sequential
read benchmark.

Also ReadDirOp.
geesefs-0-30-9
Aaron Jacobs 2015-07-29 11:19:03 +10:00
commit e157c6bc8d
9 changed files with 217 additions and 171 deletions

View File

@ -86,9 +86,10 @@ type Connection struct {
// State that is maintained for each in-flight op. This is stuffed into the // State that is maintained for each in-flight op. This is stuffed into the
// context that the user uses to reply to the op. // context that the user uses to reply to the op.
type opState struct { type opState struct {
inMsg *buffer.InMessage inMsg *buffer.InMessage
op interface{} outMsg *buffer.OutMessage
opID uint32 // For logging op interface{}
opID uint32 // For logging
} }
// Create a connection wrapping the supplied file descriptor connected to the // Create a connection wrapping the supplied file descriptor connected to the
@ -370,15 +371,17 @@ func (c *Connection) ReadOp() (ctx context.Context, op interface{}, err error) {
// Keep going until we find a request we know how to convert. // Keep going until we find a request we know how to convert.
for { for {
// Read the next message from the kernel. // Read the next message from the kernel.
var m *buffer.InMessage var inMsg *buffer.InMessage
m, err = c.readMessage() inMsg, err = c.readMessage()
if err != nil { if err != nil {
return return
} }
// Convert the message to an op. // Convert the message to an op.
op, err = convertInMessage(m, c.protocol) outMsg := c.getOutMessage()
op, err = convertInMessage(inMsg, outMsg, c.protocol)
if err != nil { if err != nil {
c.putOutMessage(outMsg)
err = fmt.Errorf("convertInMessage: %v", err) err = fmt.Errorf("convertInMessage: %v", err)
return return
} }
@ -396,8 +399,8 @@ func (c *Connection) ReadOp() (ctx context.Context, op interface{}, err error) {
} }
// Set up a context that remembers information about this op. // Set up a context that remembers information about this op.
ctx = c.beginOp(m.Header().Opcode, m.Header().Unique) ctx = c.beginOp(inMsg.Header().Opcode, inMsg.Header().Unique)
ctx = context.WithValue(ctx, contextKey, opState{m, op, opID}) ctx = context.WithValue(ctx, contextKey, opState{inMsg, outMsg, op, opID})
// Special case: responding to statfs is required to make mounting work on // Special case: responding to statfs is required to make mounting work on
// OS X. We don't currently expose the capability for the file system to // OS X. We don't currently expose the capability for the file system to
@ -426,14 +429,16 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
} }
op := state.op op := state.op
m := state.inMsg inMsg := state.inMsg
outMsg := state.outMsg
opID := state.opID opID := state.opID
// Make sure we destroy the message when we're done. // Make sure we destroy the messages when we're done.
defer c.putInMessage(m) defer c.putInMessage(inMsg)
defer c.putOutMessage(outMsg)
// Clean up state for this op. // Clean up state for this op.
c.finishOp(m.Header().Opcode, m.Header().Unique) c.finishOp(inMsg.Header().Opcode, inMsg.Header().Unique)
// Debug logging // Debug logging
if c.debugLogger != nil { if c.debugLogger != nil {
@ -450,11 +455,10 @@ func (c *Connection) Reply(ctx context.Context, opErr error) {
} }
// Send the reply to the kernel, if one is required. // Send the reply to the kernel, if one is required.
outMsg := c.kernelResponse(m.Header().Unique, op, opErr) noResponse := c.kernelResponse(outMsg, inMsg.Header().Unique, op, opErr)
if outMsg != nil {
err := c.writeMessage(outMsg.Bytes())
c.putOutMessage(outMsg)
if !noResponse {
err := c.writeMessage(outMsg.Bytes())
if err != nil && c.errorLogger != nil { if err != nil && c.errorLogger != nil {
c.errorLogger.Printf("writeMessage: %v", err) c.errorLogger.Printf("writeMessage: %v", err)
} }

View File

@ -19,6 +19,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"reflect"
"syscall" "syscall"
"time" "time"
"unsafe" "unsafe"
@ -37,11 +38,12 @@ import (
// //
// The caller is responsible for arranging for the message to be destroyed. // The caller is responsible for arranging for the message to be destroyed.
func convertInMessage( func convertInMessage(
m *buffer.InMessage, inMsg *buffer.InMessage,
outMsg *buffer.OutMessage,
protocol fusekernel.Protocol) (o interface{}, err error) { protocol fusekernel.Protocol) (o interface{}, err error) {
switch m.Header().Opcode { switch inMsg.Header().Opcode {
case fusekernel.OpLookup: case fusekernel.OpLookup:
buf := m.ConsumeBytes(m.Len()) buf := inMsg.ConsumeBytes(inMsg.Len())
n := len(buf) n := len(buf)
if n == 0 || buf[n-1] != '\x00' { if n == 0 || buf[n-1] != '\x00' {
err = errors.New("Corrupt OpLookup") err = errors.New("Corrupt OpLookup")
@ -49,25 +51,25 @@ func convertInMessage(
} }
o = &fuseops.LookUpInodeOp{ o = &fuseops.LookUpInodeOp{
Parent: fuseops.InodeID(m.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(buf[:n-1]), Name: string(buf[:n-1]),
} }
case fusekernel.OpGetattr: case fusekernel.OpGetattr:
o = &fuseops.GetInodeAttributesOp{ o = &fuseops.GetInodeAttributesOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
} }
case fusekernel.OpSetattr: case fusekernel.OpSetattr:
type input fusekernel.SetattrIn type input fusekernel.SetattrIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpSetattr") err = errors.New("Corrupt OpSetattr")
return return
} }
to := &fuseops.SetInodeAttributesOp{ to := &fuseops.SetInodeAttributesOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
} }
o = to o = to
@ -93,25 +95,25 @@ func convertInMessage(
case fusekernel.OpForget: case fusekernel.OpForget:
type input fusekernel.ForgetIn type input fusekernel.ForgetIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpForget") err = errors.New("Corrupt OpForget")
return return
} }
o = &fuseops.ForgetInodeOp{ o = &fuseops.ForgetInodeOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
N: in.Nlookup, N: in.Nlookup,
} }
case fusekernel.OpMkdir: case fusekernel.OpMkdir:
in := (*fusekernel.MkdirIn)(m.Consume(fusekernel.MkdirInSize(protocol))) in := (*fusekernel.MkdirIn)(inMsg.Consume(fusekernel.MkdirInSize(protocol)))
if in == nil { if in == nil {
err = errors.New("Corrupt OpMkdir") err = errors.New("Corrupt OpMkdir")
return return
} }
name := m.ConsumeBytes(m.Len()) name := inMsg.ConsumeBytes(inMsg.Len())
i := bytes.IndexByte(name, '\x00') i := bytes.IndexByte(name, '\x00')
if i < 0 { if i < 0 {
err = errors.New("Corrupt OpMkdir") err = errors.New("Corrupt OpMkdir")
@ -120,7 +122,7 @@ func convertInMessage(
name = name[:i] name = name[:i]
o = &fuseops.MkDirOp{ o = &fuseops.MkDirOp{
Parent: fuseops.InodeID(m.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(name), Name: string(name),
// On Linux, vfs_mkdir calls through to the inode with at most // On Linux, vfs_mkdir calls through to the inode with at most
@ -133,13 +135,13 @@ func convertInMessage(
} }
case fusekernel.OpCreate: case fusekernel.OpCreate:
in := (*fusekernel.CreateIn)(m.Consume(fusekernel.CreateInSize(protocol))) in := (*fusekernel.CreateIn)(inMsg.Consume(fusekernel.CreateInSize(protocol)))
if in == nil { if in == nil {
err = errors.New("Corrupt OpCreate") err = errors.New("Corrupt OpCreate")
return return
} }
name := m.ConsumeBytes(m.Len()) name := inMsg.ConsumeBytes(inMsg.Len())
i := bytes.IndexByte(name, '\x00') i := bytes.IndexByte(name, '\x00')
if i < 0 { if i < 0 {
err = errors.New("Corrupt OpCreate") err = errors.New("Corrupt OpCreate")
@ -148,14 +150,14 @@ func convertInMessage(
name = name[:i] name = name[:i]
o = &fuseops.CreateFileOp{ o = &fuseops.CreateFileOp{
Parent: fuseops.InodeID(m.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(name), Name: string(name),
Mode: convertFileMode(in.Mode), Mode: convertFileMode(in.Mode),
} }
case fusekernel.OpSymlink: case fusekernel.OpSymlink:
// The message is "newName\0target\0". // The message is "newName\0target\0".
names := m.ConsumeBytes(m.Len()) names := inMsg.ConsumeBytes(inMsg.Len())
if len(names) == 0 || names[len(names)-1] != 0 { if len(names) == 0 || names[len(names)-1] != 0 {
err = errors.New("Corrupt OpSymlink") err = errors.New("Corrupt OpSymlink")
return return
@ -168,20 +170,20 @@ func convertInMessage(
newName, target := names[0:i], names[i+1:len(names)-1] newName, target := names[0:i], names[i+1:len(names)-1]
o = &fuseops.CreateSymlinkOp{ o = &fuseops.CreateSymlinkOp{
Parent: fuseops.InodeID(m.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(newName), Name: string(newName),
Target: string(target), Target: string(target),
} }
case fusekernel.OpRename: case fusekernel.OpRename:
type input fusekernel.RenameIn type input fusekernel.RenameIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpRename") err = errors.New("Corrupt OpRename")
return return
} }
names := m.ConsumeBytes(m.Len()) names := inMsg.ConsumeBytes(inMsg.Len())
// names should be "old\x00new\x00" // names should be "old\x00new\x00"
if len(names) < 4 { if len(names) < 4 {
err = errors.New("Corrupt OpRename") err = errors.New("Corrupt OpRename")
@ -199,14 +201,14 @@ func convertInMessage(
oldName, newName := names[:i], names[i+1:len(names)-1] oldName, newName := names[:i], names[i+1:len(names)-1]
o = &fuseops.RenameOp{ o = &fuseops.RenameOp{
OldParent: fuseops.InodeID(m.Header().Nodeid), OldParent: fuseops.InodeID(inMsg.Header().Nodeid),
OldName: string(oldName), OldName: string(oldName),
NewParent: fuseops.InodeID(in.Newdir), NewParent: fuseops.InodeID(in.Newdir),
NewName: string(newName), NewName: string(newName),
} }
case fusekernel.OpUnlink: case fusekernel.OpUnlink:
buf := m.ConsumeBytes(m.Len()) buf := inMsg.ConsumeBytes(inMsg.Len())
n := len(buf) n := len(buf)
if n == 0 || buf[n-1] != '\x00' { if n == 0 || buf[n-1] != '\x00' {
err = errors.New("Corrupt OpUnlink") err = errors.New("Corrupt OpUnlink")
@ -214,12 +216,12 @@ func convertInMessage(
} }
o = &fuseops.UnlinkOp{ o = &fuseops.UnlinkOp{
Parent: fuseops.InodeID(m.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(buf[:n-1]), Name: string(buf[:n-1]),
} }
case fusekernel.OpRmdir: case fusekernel.OpRmdir:
buf := m.ConsumeBytes(m.Len()) buf := inMsg.ConsumeBytes(inMsg.Len())
n := len(buf) n := len(buf)
if n == 0 || buf[n-1] != '\x00' { if n == 0 || buf[n-1] != '\x00' {
err = errors.New("Corrupt OpRmdir") err = errors.New("Corrupt OpRmdir")
@ -227,51 +229,75 @@ func convertInMessage(
} }
o = &fuseops.RmDirOp{ o = &fuseops.RmDirOp{
Parent: fuseops.InodeID(m.Header().Nodeid), Parent: fuseops.InodeID(inMsg.Header().Nodeid),
Name: string(buf[:n-1]), Name: string(buf[:n-1]),
} }
case fusekernel.OpOpen: case fusekernel.OpOpen:
o = &fuseops.OpenFileOp{ o = &fuseops.OpenFileOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
} }
case fusekernel.OpOpendir: case fusekernel.OpOpendir:
o = &fuseops.OpenDirOp{ o = &fuseops.OpenDirOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
} }
case fusekernel.OpRead: case fusekernel.OpRead:
in := (*fusekernel.ReadIn)(m.Consume(fusekernel.ReadInSize(protocol))) in := (*fusekernel.ReadIn)(inMsg.Consume(fusekernel.ReadInSize(protocol)))
if in == nil { if in == nil {
err = errors.New("Corrupt OpRead") err = errors.New("Corrupt OpRead")
return return
} }
o = &fuseops.ReadFileOp{ to := &fuseops.ReadFileOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
Offset: int64(in.Offset), Offset: int64(in.Offset),
Size: int(in.Size), }
o = to
readSize := int(in.Size)
p := outMsg.GrowNoZero(uintptr(readSize))
if p == nil {
err = fmt.Errorf("Can't grow for %d-byte read", readSize)
return
} }
sh := (*reflect.SliceHeader)(unsafe.Pointer(&to.Dst))
sh.Data = uintptr(p)
sh.Len = readSize
sh.Cap = readSize
case fusekernel.OpReaddir: case fusekernel.OpReaddir:
in := (*fusekernel.ReadIn)(m.Consume(fusekernel.ReadInSize(protocol))) in := (*fusekernel.ReadIn)(inMsg.Consume(fusekernel.ReadInSize(protocol)))
if in == nil { if in == nil {
err = errors.New("Corrupt OpReaddir") err = errors.New("Corrupt OpReaddir")
return return
} }
o = &fuseops.ReadDirOp{ to := &fuseops.ReadDirOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
Offset: fuseops.DirOffset(in.Offset), Offset: fuseops.DirOffset(in.Offset),
Size: int(in.Size),
} }
o = to
readSize := int(in.Size)
p := outMsg.GrowNoZero(uintptr(readSize))
if p == nil {
err = fmt.Errorf("Can't grow for %d-byte read", readSize)
return
}
sh := (*reflect.SliceHeader)(unsafe.Pointer(&to.Dst))
sh.Data = uintptr(p)
sh.Len = readSize
sh.Cap = readSize
case fusekernel.OpRelease: case fusekernel.OpRelease:
type input fusekernel.ReleaseIn type input fusekernel.ReleaseIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpRelease") err = errors.New("Corrupt OpRelease")
return return
@ -283,7 +309,7 @@ func convertInMessage(
case fusekernel.OpReleasedir: case fusekernel.OpReleasedir:
type input fusekernel.ReleaseIn type input fusekernel.ReleaseIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpReleasedir") err = errors.New("Corrupt OpReleasedir")
return return
@ -294,20 +320,20 @@ func convertInMessage(
} }
case fusekernel.OpWrite: case fusekernel.OpWrite:
in := (*fusekernel.WriteIn)(m.Consume(fusekernel.WriteInSize(protocol))) in := (*fusekernel.WriteIn)(inMsg.Consume(fusekernel.WriteInSize(protocol)))
if in == nil { if in == nil {
err = errors.New("Corrupt OpWrite") err = errors.New("Corrupt OpWrite")
return return
} }
buf := m.ConsumeBytes(m.Len()) buf := inMsg.ConsumeBytes(inMsg.Len())
if len(buf) < int(in.Size) { if len(buf) < int(in.Size) {
err = errors.New("Corrupt OpWrite") err = errors.New("Corrupt OpWrite")
return return
} }
o = &fuseops.WriteFileOp{ o = &fuseops.WriteFileOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
Data: buf, Data: buf,
Offset: int64(in.Offset), Offset: int64(in.Offset),
@ -315,33 +341,33 @@ func convertInMessage(
case fusekernel.OpFsync: case fusekernel.OpFsync:
type input fusekernel.FsyncIn type input fusekernel.FsyncIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpFsync") err = errors.New("Corrupt OpFsync")
return return
} }
o = &fuseops.SyncFileOp{ o = &fuseops.SyncFileOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
} }
case fusekernel.OpFlush: case fusekernel.OpFlush:
type input fusekernel.FlushIn type input fusekernel.FlushIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpFlush") err = errors.New("Corrupt OpFlush")
return return
} }
o = &fuseops.FlushFileOp{ o = &fuseops.FlushFileOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
Handle: fuseops.HandleID(in.Fh), Handle: fuseops.HandleID(in.Fh),
} }
case fusekernel.OpReadlink: case fusekernel.OpReadlink:
o = &fuseops.ReadSymlinkOp{ o = &fuseops.ReadSymlinkOp{
Inode: fuseops.InodeID(m.Header().Nodeid), Inode: fuseops.InodeID(inMsg.Header().Nodeid),
} }
case fusekernel.OpStatfs: case fusekernel.OpStatfs:
@ -349,7 +375,7 @@ func convertInMessage(
case fusekernel.OpInterrupt: case fusekernel.OpInterrupt:
type input fusekernel.InterruptIn type input fusekernel.InterruptIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpInterrupt") err = errors.New("Corrupt OpInterrupt")
return return
@ -361,7 +387,7 @@ func convertInMessage(
case fusekernel.OpInit: case fusekernel.OpInit:
type input fusekernel.InitIn type input fusekernel.InitIn
in := (*input)(m.Consume(unsafe.Sizeof(input{}))) in := (*input)(inMsg.Consume(unsafe.Sizeof(input{})))
if in == nil { if in == nil {
err = errors.New("Corrupt OpInit") err = errors.New("Corrupt OpInit")
return return
@ -375,8 +401,8 @@ func convertInMessage(
default: default:
o = &unknownOp{ o = &unknownOp{
opCode: m.Header().Opcode, opCode: inMsg.Header().Opcode,
inode: fuseops.InodeID(m.Header().Nodeid), inode: fuseops.InodeID(inMsg.Header().Nodeid),
} }
} }
@ -387,51 +413,45 @@ func convertInMessage(
// Outgoing messages // Outgoing messages
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Return the response that should be sent to the kernel, or nil if the op // Fill in the response that should be sent to the kernel, or set noResponse if
// requires no response. // the op requires no response.
func (c *Connection) kernelResponse( func (c *Connection) kernelResponse(
m *buffer.OutMessage,
fuseID uint64, fuseID uint64,
op interface{}, op interface{},
opErr error) (m *buffer.OutMessage) { opErr error) (noResponse bool) {
// If the user replied with an error, create a response containing just the h := m.OutHeader()
// result header with the error filled in. Otherwise create an appropriate h.Unique = fuseID
// response.
// Did the user return an error? Otherwise, fill in the rest of the response.
if opErr != nil { if opErr != nil {
m = c.getOutMessage()
if errno, ok := opErr.(syscall.Errno); ok { if errno, ok := opErr.(syscall.Errno); ok {
m.OutHeader().Error = -int32(errno) m.OutHeader().Error = -int32(errno)
} else { } else {
m.OutHeader().Error = -int32(syscall.EIO) m.OutHeader().Error = -int32(syscall.EIO)
} }
} else { } else {
m = c.kernelResponseForOp(op) noResponse = c.kernelResponseForOp(m, op)
}
// Fill in the rest of the header, if a response is required.
if m != nil {
h := m.OutHeader()
h.Unique = fuseID
h.Len = uint32(m.Len())
} }
h.Len = uint32(m.Len())
return return
} }
// Like kernelResponse, but assumes the user replied with a nil error to the // Like kernelResponse, but assumes the user replied with a nil error to the
// op. Returns a nil response if no response is required. // op.
func (c *Connection) kernelResponseForOp( func (c *Connection) kernelResponseForOp(
op interface{}) (m *buffer.OutMessage) { m *buffer.OutMessage,
op interface{}) (noResponse bool) {
// Create the appropriate output message // Create the appropriate output message
switch o := op.(type) { switch o := op.(type) {
case *fuseops.LookUpInodeOp: case *fuseops.LookUpInodeOp:
size := fusekernel.EntryOutSize(c.protocol) size := fusekernel.EntryOutSize(c.protocol)
m = c.getOutMessage()
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) convertChildInodeEntry(&o.Entry, out)
case *fuseops.GetInodeAttributesOp: case *fuseops.GetInodeAttributesOp:
size := fusekernel.AttrOutSize(c.protocol) size := fusekernel.AttrOutSize(c.protocol)
m = c.getOutMessage()
out := (*fusekernel.AttrOut)(m.Grow(size)) out := (*fusekernel.AttrOut)(m.Grow(size))
out.AttrValid, out.AttrValidNsec = convertExpirationTime( out.AttrValid, out.AttrValidNsec = convertExpirationTime(
o.AttributesExpiration) o.AttributesExpiration)
@ -439,24 +459,21 @@ func (c *Connection) kernelResponseForOp(
case *fuseops.SetInodeAttributesOp: case *fuseops.SetInodeAttributesOp:
size := fusekernel.AttrOutSize(c.protocol) size := fusekernel.AttrOutSize(c.protocol)
m = c.getOutMessage()
out := (*fusekernel.AttrOut)(m.Grow(size)) out := (*fusekernel.AttrOut)(m.Grow(size))
out.AttrValid, out.AttrValidNsec = convertExpirationTime( out.AttrValid, out.AttrValidNsec = convertExpirationTime(
o.AttributesExpiration) o.AttributesExpiration)
convertAttributes(o.Inode, &o.Attributes, &out.Attr) convertAttributes(o.Inode, &o.Attributes, &out.Attr)
case *fuseops.ForgetInodeOp: case *fuseops.ForgetInodeOp:
// No response. noResponse = true
case *fuseops.MkDirOp: case *fuseops.MkDirOp:
size := fusekernel.EntryOutSize(c.protocol) size := fusekernel.EntryOutSize(c.protocol)
m = c.getOutMessage()
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) convertChildInodeEntry(&o.Entry, out)
case *fuseops.CreateFileOp: case *fuseops.CreateFileOp:
eSize := fusekernel.EntryOutSize(c.protocol) eSize := fusekernel.EntryOutSize(c.protocol)
m = c.getOutMessage()
e := (*fusekernel.EntryOut)(m.Grow(eSize)) e := (*fusekernel.EntryOut)(m.Grow(eSize))
convertChildInodeEntry(&o.Entry, e) convertChildInodeEntry(&o.Entry, e)
@ -466,67 +483,64 @@ func (c *Connection) kernelResponseForOp(
case *fuseops.CreateSymlinkOp: case *fuseops.CreateSymlinkOp:
size := fusekernel.EntryOutSize(c.protocol) size := fusekernel.EntryOutSize(c.protocol)
m = c.getOutMessage()
out := (*fusekernel.EntryOut)(m.Grow(size)) out := (*fusekernel.EntryOut)(m.Grow(size))
convertChildInodeEntry(&o.Entry, out) convertChildInodeEntry(&o.Entry, out)
case *fuseops.RenameOp: case *fuseops.RenameOp:
m = c.getOutMessage() // Empty response
case *fuseops.RmDirOp: case *fuseops.RmDirOp:
m = c.getOutMessage() // Empty response
case *fuseops.UnlinkOp: case *fuseops.UnlinkOp:
m = c.getOutMessage() // Empty response
case *fuseops.OpenDirOp: case *fuseops.OpenDirOp:
m = c.getOutMessage()
out := (*fusekernel.OpenOut)(m.Grow(unsafe.Sizeof(fusekernel.OpenOut{}))) out := (*fusekernel.OpenOut)(m.Grow(unsafe.Sizeof(fusekernel.OpenOut{})))
out.Fh = uint64(o.Handle) out.Fh = uint64(o.Handle)
case *fuseops.ReadDirOp: case *fuseops.ReadDirOp:
m = c.getOutMessage() // convertInMessage already set up the destination buffer to be at the end
m.Append(o.Data) // of the out message. We need only shrink to the right size based on how
// much the user read.
m.Shrink(uintptr(m.Len() - (int(buffer.OutMessageInitialSize) + o.BytesRead)))
case *fuseops.ReleaseDirHandleOp: case *fuseops.ReleaseDirHandleOp:
m = c.getOutMessage() // Empty response
case *fuseops.OpenFileOp: case *fuseops.OpenFileOp:
m = c.getOutMessage()
out := (*fusekernel.OpenOut)(m.Grow(unsafe.Sizeof(fusekernel.OpenOut{}))) out := (*fusekernel.OpenOut)(m.Grow(unsafe.Sizeof(fusekernel.OpenOut{})))
out.Fh = uint64(o.Handle) out.Fh = uint64(o.Handle)
case *fuseops.ReadFileOp: case *fuseops.ReadFileOp:
m = c.getOutMessage() // convertInMessage already set up the destination buffer to be at the end
m.Append(o.Data) // of the out message. We need only shrink to the right size based on how
// much the user read.
m.Shrink(uintptr(m.Len() - (int(buffer.OutMessageInitialSize) + o.BytesRead)))
case *fuseops.WriteFileOp: case *fuseops.WriteFileOp:
m = c.getOutMessage()
out := (*fusekernel.WriteOut)(m.Grow(unsafe.Sizeof(fusekernel.WriteOut{}))) out := (*fusekernel.WriteOut)(m.Grow(unsafe.Sizeof(fusekernel.WriteOut{})))
out.Size = uint32(len(o.Data)) out.Size = uint32(len(o.Data))
case *fuseops.SyncFileOp: case *fuseops.SyncFileOp:
m = c.getOutMessage() // Empty response
case *fuseops.FlushFileOp: case *fuseops.FlushFileOp:
m = c.getOutMessage() // Empty response
case *fuseops.ReleaseFileHandleOp: case *fuseops.ReleaseFileHandleOp:
m = c.getOutMessage() // Empty response
case *fuseops.ReadSymlinkOp: case *fuseops.ReadSymlinkOp:
m = c.getOutMessage()
m.AppendString(o.Target) m.AppendString(o.Target)
case *statFSOp: case *statFSOp:
m = c.getOutMessage()
m.Grow(unsafe.Sizeof(fusekernel.StatfsOut{})) m.Grow(unsafe.Sizeof(fusekernel.StatfsOut{}))
case *interruptOp: case *interruptOp:
// No response. noResponse = true
case *initOp: case *initOp:
m = c.getOutMessage()
out := (*fusekernel.InitOut)(m.Grow(unsafe.Sizeof(fusekernel.InitOut{}))) out := (*fusekernel.InitOut)(m.Grow(unsafe.Sizeof(fusekernel.InitOut{})))
out.Major = o.Library.Major out.Major = o.Library.Major

View File

@ -384,25 +384,29 @@ type ReadDirOp struct {
// offset, and return array offsets into that cached listing. // offset, and return array offsets into that cached listing.
Offset DirOffset Offset DirOffset
// The maximum number of bytes to return in ReadDirResponse.Data. A smaller // The destination buffer, whose length gives the size of the read.
// number is acceptable.
Size int
// Set by the file system: a buffer consisting of a sequence of FUSE
// directory entries in the format generated by fuse_add_direntry
// (http://goo.gl/qCcHCV), which is consumed by parse_dirfile
// (http://goo.gl/2WUmD2). Use fuseutil.AppendDirent to generate this data.
// //
// The buffer must not exceed the length specified in ReadDirRequest.Size. It // The output data should consist of a sequence of FUSE directory entries in
// is okay for the final entry to be truncated; parse_dirfile copes with this // the format generated by fuse_add_direntry (http://goo.gl/qCcHCV), which is
// by ignoring the partial record. // consumed by parse_dirfile (http://goo.gl/2WUmD2). Use fuseutil.WriteDirent
// to generate this data.
// //
// Each entry returned exposes a directory offset to the user that may later // Each entry returned exposes a directory offset to the user that may later
// show up in ReadDirRequest.Offset. See notes on that field for more // show up in ReadDirRequest.Offset. See notes on that field for more
// information. // information.
Dst []byte
// Set by the file system: the number of bytes read into Dst.
// //
// An empty buffer indicates the end of the directory has been reached. // It is okay for this to be less than len(Dst) if there are not enough
Data []byte // entries available or the final entry would not fit.
//
// Zero means that the end of the directory has been reached. This is
// unambiguous because NAME_MAX (https://goo.gl/ZxzKaE) plus the size of
// fuse_dirent (https://goo.gl/WO8s3F) plus the 8-byte alignment of
// FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH) is less than the read size of
// PAGE_SIZE used by fuse_readdir (cf. https://goo.gl/VajtS2).
BytesRead int
} }
// Release a previously-minted directory handle. The kernel sends this when // Release a previously-minted directory handle. The kernel sends this when
@ -455,20 +459,21 @@ type ReadFileOp struct {
Inode InodeID Inode InodeID
Handle HandleID Handle HandleID
// The range of the file to read. // The offset within the file at which to read.
Offset int64
// The destination buffer, whose length gives the size of the read.
Dst []byte
// Set by the file system: the number of bytes read.
// //
// The FUSE documentation requires that exactly the number of bytes be // The FUSE documentation requires that exactly the requested number of bytes
// returned, except in the case of EOF or error (http://goo.gl/ZgfBkF). This // be returned, except in the case of EOF or error (http://goo.gl/ZgfBkF).
// appears to be because it uses file mmapping machinery // This appears to be because it uses file mmapping machinery
// (http://goo.gl/SGxnaN) to read a page at a time. It appears to understand // (http://goo.gl/SGxnaN) to read a page at a time. It appears to understand
// where EOF is by checking the inode size (http://goo.gl/0BkqKD), returned // where EOF is by checking the inode size (http://goo.gl/0BkqKD), returned
// by a previous call to LookUpInode, GetInodeAttributes, etc. // by a previous call to LookUpInode, GetInodeAttributes, etc.
Offset int64 BytesRead int
Size int
// Set by the file system: the data read. If this is less than the requested
// size, it indicates EOF. An error should not be returned in this case.
Data []byte
} }
// Write data to a file previously opened with CreateFile or OpenFile. // Write data to a file previously opened with CreateFile or OpenFile.

View File

@ -35,7 +35,7 @@ const (
) )
// A struct representing an entry within a directory file, describing a child. // A struct representing an entry within a directory file, describing a child.
// See notes on fuseops.ReadDirOp and on AppendDirent for details. // See notes on fuseops.ReadDirOp and on WriteDirent for details.
type Dirent struct { type Dirent struct {
// The (opaque) offset within the directory file of the entry following this // The (opaque) offset within the directory file of the entry following this
// one. See notes on fuseops.ReadDirOp.Offset for details. // one. See notes on fuseops.ReadDirOp.Offset for details.
@ -50,10 +50,11 @@ type Dirent struct {
Type DirentType Type DirentType
} }
// Append the supplied directory entry to the given buffer in the format // Write the supplied directory entry intto the given buffer in the format
// expected in fuseops.ReadFileOp.Data, returning the resulting buffer. // expected in fuseops.ReadFileOp.Data, returning the number of bytes written.
func AppendDirent(input []byte, d Dirent) (output []byte) { // Return zero if the entry would not fit.
// We want to append bytes with the layout of fuse_dirent func WriteDirent(buf []byte, d Dirent) (n int) {
// We want to write bytes with the layout of fuse_dirent
// (http://goo.gl/BmFxob) in host order. The struct must be aligned according // (http://goo.gl/BmFxob) in host order. The struct must be aligned according
// to FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH), which dictates 8-byte // to FUSE_DIRENT_ALIGN (http://goo.gl/UziWvH), which dictates 8-byte
// alignment. // alignment.
@ -65,10 +66,23 @@ func AppendDirent(input []byte, d Dirent) (output []byte) {
name [0]byte name [0]byte
} }
const alignment = 8 const direntAlignment = 8
const nameOffset = 8 + 8 + 4 + 4 const direntSize = 8 + 8 + 4 + 4
// Write the header into the buffer. // Compute the number of bytes of padding we'll need to maintain alignment
// for the next entry.
var padLen int
if len(d.Name)%direntAlignment != 0 {
padLen = direntAlignment - (len(d.Name) % direntAlignment)
}
// Do we have enough room?
totalLen := direntSize + len(d.Name) + padLen
if totalLen > len(buf) {
return
}
// Write the header.
de := fuse_dirent{ de := fuse_dirent{
ino: uint64(d.Inode), ino: uint64(d.Inode),
off: uint64(d.Offset), off: uint64(d.Offset),
@ -76,17 +90,15 @@ func AppendDirent(input []byte, d Dirent) (output []byte) {
type_: uint32(d.Type), type_: uint32(d.Type),
} }
output = append(input, (*[nameOffset]byte)(unsafe.Pointer(&de))[:]...) n += copy(buf[n:], (*[direntSize]byte)(unsafe.Pointer(&de))[:])
// Write the name afterward. // Write the name afterward.
output = append(output, d.Name...) n += copy(buf[n:], d.Name)
// Add any necessary padding. // Add any necessary padding.
if len(d.Name)%alignment != 0 { if padLen != 0 {
padLen := alignment - (len(d.Name) % alignment) var padding [direntAlignment]byte
n += copy(buf[n:], padding[:padLen])
var padding [alignment]byte
output = append(output, padding[:padLen]...)
} }
return return

View File

@ -25,6 +25,9 @@ import (
const outHeaderSize = unsafe.Sizeof(fusekernel.OutHeader{}) const outHeaderSize = unsafe.Sizeof(fusekernel.OutHeader{})
// OutMessage structs begin life with Len() == OutMessageInitialSize.
const OutMessageInitialSize = outHeaderSize
// We size out messages to be large enough to hold a header for the response // We size out messages to be large enough to hold a header for the response
// plus the largest read that may come in. // plus the largest read that may come in.
const outMessageSize = outHeaderSize + MaxReadSize const outMessageSize = outHeaderSize + MaxReadSize
@ -53,8 +56,8 @@ func init() {
// Reset the message so that it is ready to be used again. Afterward, the // Reset the message so that it is ready to be used again. Afterward, the
// contents are solely a zeroed header. // contents are solely a zeroed header.
func (m *OutMessage) Reset() { func (m *OutMessage) Reset() {
m.offset = outHeaderSize m.offset = OutMessageInitialSize
memclr(unsafe.Pointer(&m.storage), outHeaderSize) memclr(unsafe.Pointer(&m.storage), OutMessageInitialSize)
} }
// Return a pointer to the header at the start of the message. // Return a pointer to the header at the start of the message.
@ -87,6 +90,15 @@ func (b *OutMessage) GrowNoZero(size uintptr) (p unsafe.Pointer) {
return return
} }
// Throw away the last n bytes. Panics if n is out of range.
func (b *OutMessage) Shrink(n uintptr) {
if n > b.offset-OutMessageInitialSize {
panic(fmt.Sprintf("Shrink(%d) out of range for offset %d", n, b.offset))
}
b.offset -= n
}
// Equivalent to growing by the length of p, then copying p over the new // Equivalent to growing by the length of p, then copying p over the new
// segment. Panics if there is not enough room available. // segment. Panics if there is not enough room available.
func (b *OutMessage) Append(src []byte) { func (b *OutMessage) Append(src []byte) {

View File

@ -180,8 +180,7 @@ func (fs *flushFS) ReadFile(
} }
// Read what we can. // Read what we can.
op.Data = make([]byte, op.Size) op.BytesRead = copy(op.Dst, fs.fooContents[op.Offset:])
copy(op.Data, fs.fooContents[op.Offset:])
return return
} }
@ -298,13 +297,15 @@ func (fs *flushFS) ReadDir(
// Fill in the listing. // Fill in the listing.
for _, de := range dirents { for _, de := range dirents {
op.Data = fuseutil.AppendDirent(op.Data, de) n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], de)
}
// We don't support doing this in anything more than one shot. // We don't support doing this in anything more than one shot.
if len(op.Data) > op.Size { if n == 0 {
err = fmt.Errorf("Couldn't fit listing in %v bytes", op.Size) err = fmt.Errorf("Couldn't fit listing in %v bytes", len(op.Dst))
return return
}
op.BytesRead += n
} }
return return

View File

@ -228,11 +228,12 @@ func (fs *helloFS) ReadDir(
// Resume at the specified offset into the array. // Resume at the specified offset into the array.
for _, e := range entries { for _, e := range entries {
op.Data = fuseutil.AppendDirent(op.Data, e) n := fuseutil.WriteDirent(op.Dst[op.BytesRead:], e)
if len(op.Data) > op.Size { if n == 0 {
op.Data = op.Data[:op.Size]
break break
} }
op.BytesRead += n
} }
return return
@ -251,9 +252,7 @@ func (fs *helloFS) ReadFile(
// Let io.ReaderAt deal with the semantics. // Let io.ReaderAt deal with the semantics.
reader := strings.NewReader("Hello, world!") reader := strings.NewReader("Hello, world!")
op.Data = make([]byte, op.Size) op.BytesRead, err = reader.ReadAt(op.Dst, op.Offset)
n, err := reader.ReadAt(op.Data, op.Offset)
op.Data = op.Data[:n]
// Special case: FUSE doesn't expect us to return io.EOF. // Special case: FUSE doesn't expect us to return io.EOF.
if err == io.EOF { if err == io.EOF {

View File

@ -278,7 +278,7 @@ func (in *inode) RemoveChild(name string) {
// Serve a ReadDir request. // Serve a ReadDir request.
// //
// REQUIRES: in.isDir() // REQUIRES: in.isDir()
func (in *inode) ReadDir(offset int, size int) (data []byte) { func (in *inode) ReadDir(p []byte, offset int) (n int) {
if !in.isDir() { if !in.isDir() {
panic("ReadDir called on non-directory.") panic("ReadDir called on non-directory.")
} }
@ -291,13 +291,12 @@ func (in *inode) ReadDir(offset int, size int) (data []byte) {
continue continue
} }
data = fuseutil.AppendDirent(data, in.entries[i]) tmp := fuseutil.WriteDirent(p[n:], in.entries[i])
if tmp == 0 {
// Trim and stop early if we've exceeded the requested size.
if len(data) > size {
data = data[:size]
break break
} }
n += tmp
} }
return return

View File

@ -428,7 +428,9 @@ func (fs *memFS) Rename(
existingID, _, ok := newParent.LookUpChild(op.NewName) existingID, _, ok := newParent.LookUpChild(op.NewName)
if ok { if ok {
existing := fs.getInodeOrDie(existingID) existing := fs.getInodeOrDie(existingID)
if existing.isDir() && len(existing.ReadDir(0, 1024)) > 0 {
var buf [4096]byte
if existing.isDir() && existing.ReadDir(buf[:], 0) > 0 {
err = fuse.ENOTEMPTY err = fuse.ENOTEMPTY
return return
} }
@ -538,7 +540,7 @@ func (fs *memFS) ReadDir(
inode := fs.getInodeOrDie(op.Inode) inode := fs.getInodeOrDie(op.Inode)
// Serve the request. // Serve the request.
op.Data = inode.ReadDir(int(op.Offset), op.Size) op.BytesRead = inode.ReadDir(op.Dst, int(op.Offset))
return return
} }
@ -571,9 +573,7 @@ func (fs *memFS) ReadFile(
inode := fs.getInodeOrDie(op.Inode) inode := fs.getInodeOrDie(op.Inode)
// Serve the request. // Serve the request.
op.Data = make([]byte, op.Size) op.BytesRead, err = inode.ReadAt(op.Dst, op.Offset)
n, err := inode.ReadAt(op.Data, op.Offset)
op.Data = op.Data[:n]
// Don't return EOF errors; we just indicate EOF to fuse using a short read. // Don't return EOF errors; we just indicate EOF to fuse using a short read.
if err == io.EOF { if err == io.EOF {