From 2f38adeb3d1f2a0859a7f114e9c987832eb88361 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sun, 24 Dec 2023 12:58:50 +0300 Subject: [PATCH] Restart dead VDUSE daemons at regular intervals --- csi/src/nodeserver.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/csi/src/nodeserver.go b/csi/src/nodeserver.go index 6f9987e5..7694a861 100644 --- a/csi/src/nodeserver.go +++ b/csi/src/nodeserver.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "syscall" + "time" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -32,6 +33,7 @@ type NodeServer struct useVduse bool stateDir string mounter mount.Interface + restartInterval time.Duration } type DeviceState struct @@ -65,6 +67,16 @@ func NewNodeServer(driver *Driver) *NodeServer if (ns.useVduse) { ns.restoreVduseDaemons() + dur, err := time.ParseDuration(os.Getenv("RESTART_INTERVAL")) + if (err != nil) + { + dur = 10 * time.Second + } + ns.restartInterval = dur + if (ns.restartInterval != time.Duration(0)) + { + go ns.restarter() + } } return ns } @@ -364,6 +376,21 @@ func (ns *NodeServer) unmapVduseById(vdpaId string) } } +func (ns *NodeServer) restarter() +{ + // Restart dead VDUSE daemons at regular intervals + // Otherwise volume I/O may hang in case of a qemu-storage-daemon crash + // Moreover, it may lead to a kernel panic of the kernel is configured to + // panic on hung tasks + ticker := time.NewTicker(ns.restartInterval) + defer ticker.Stop() + for + { + <-ticker.C + ns.restoreVduseDaemons() + } +} + func (ns *NodeServer) restoreVduseDaemons() { pattern := ns.stateDir+"vitastor-vduse-*.json"