From 1611e6cf4e7163f6102b37010a8b7e7120f468b5 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 18 Nov 2021 15:46:18 -0500 Subject: [PATCH] python/machine: handle "fast" QEMU terminations In the case that the QEMU process actually launches -- but then dies so quickly that we can't establish a QMP connection to it -- QEMUMachine currently calls _post_shutdown() assuming that it never launched the VM process. This isn't true, though: it "merely" may have failed to establish a QMP connection and the process is in the middle of its own exit path. If we don't wait for the subprocess, the caller may get a bogus `None` return for .exitcode(). This behavior was observed from device-crash-test; after the switch to Async QMP, the timings were changed such that it was now seemingly possible to witness the failure of "vm.launch()" *prior* to the exitcode becoming available. The semantic of the `_launched` property is changed in this patch. Instead of representing the condition "launch() executed successfully", it will now represent "has forked a child process successfully". This way, wait() when called in the exit path won't become a no-op. Signed-off-by: John Snow Reviewed-by: Willian Rampazzo Message-id: 20211118204620.1897674-6-jsnow@redhat.com Signed-off-by: John Snow --- python/qemu/machine/machine.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py index f92e73de40..67ab06ca2b 100644 --- a/python/qemu/machine/machine.py +++ b/python/qemu/machine/machine.py @@ -349,9 +349,6 @@ class QEMUMachine: Called to cleanup the VM instance after the process has exited. May also be called after a failed launch. """ - # Comprehensive reset for the failed launch case: - self._early_cleanup() - try: self._close_qmp_connection() except Exception as err: # pylint: disable=broad-except @@ -400,9 +397,16 @@ class QEMUMachine: try: self._launch() - self._launched = True except: - self._post_shutdown() + # We may have launched the process but it may + # have exited before we could connect via QMP. + # Assume the VM didn't launch or is exiting. + # If we don't wait for the process, exitcode() may still be + # 'None' by the time control is ceded back to the caller. + if self._launched: + self.wait() + else: + self._post_shutdown() LOG.debug('Error launching VM') if self._qemu_full_args: @@ -426,6 +430,7 @@ class QEMUMachine: stderr=subprocess.STDOUT, shell=False, close_fds=False) + self._launched = True self._post_launch() def _close_qmp_connection(self) -> None: @@ -457,8 +462,8 @@ class QEMUMachine: """ Perform any cleanup that needs to happen before the VM exits. - May be invoked by both soft and hard shutdown in failover scenarios. - Called additionally by _post_shutdown for comprehensive cleanup. + This method may be called twice upon shutdown, once each by soft + and hard shutdown in failover scenarios. """ # If we keep the console socket open, we may deadlock waiting # for QEMU to exit, while QEMU is waiting for the socket to