diff --git a/Makefile b/Makefile index bfc4b2c8e9..676a4a54f4 100644 --- a/Makefile +++ b/Makefile @@ -283,6 +283,13 @@ include $(SRC_PATH)/tests/vm/Makefile.include print-help-run = printf " %-30s - %s\\n" "$1" "$2" print-help = @$(call print-help-run,$1,$2) +.PHONY: update-linux-vdso +update-linux-vdso: + @for m in $(SRC_PATH)/linux-user/*/Makefile.vdso; do \ + $(MAKE) $(SUBDIR_MAKEFLAGS) -C $$(dirname $$m) -f Makefile.vdso \ + SRC_PATH=$(SRC_PATH) BUILD_DIR=$(BUILD_DIR); \ + done + .PHONY: help help: @echo 'Generic targets:' @@ -303,6 +310,9 @@ endif $(call print-help,distclean,Remove all generated files) $(call print-help,dist,Build a distributable tarball) @echo '' + @echo 'Linux-user targets:' + $(call print-help,update-linux-vdso,Build linux-user vdso images) + @echo '' @echo 'Test targets:' $(call print-help,check,Run all tests (check-help for details)) $(call print-help,bench,Run all benchmarks) diff --git a/linux-user/aarch64/Makefile.vdso b/linux-user/aarch64/Makefile.vdso new file mode 100644 index 0000000000..599958116b --- /dev/null +++ b/linux-user/aarch64/Makefile.vdso @@ -0,0 +1,15 @@ +include $(BUILD_DIR)/tests/tcg/aarch64-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/aarch64 +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso-be.so $(SUBDIR)/vdso-le.so + +LDFLAGS = -nostdlib -shared -Wl,-h,linux-vdso.so.1 -Wl,--build-id=sha1 \ + -Wl,--hash-style=both -Wl,-T,$(SUBDIR)/vdso.ld + +$(SUBDIR)/vdso-be.so: vdso.S vdso.ld + $(CC) -o $@ $(LDFLAGS) -mbig-endian $< + +$(SUBDIR)/vdso-le.so: vdso.S vdso.ld + $(CC) -o $@ $(LDFLAGS) -mlittle-endian $< diff --git a/linux-user/aarch64/meson.build b/linux-user/aarch64/meson.build new file mode 100644 index 0000000000..248c578d15 --- /dev/null +++ b/linux-user/aarch64/meson.build @@ -0,0 +1,11 @@ +# TARGET_BIG_ENDIAN is defined to 'n' for little-endian; which means it +# is always true as far as source_set.apply() is concerned. Always build +# both header files and include the right one via #if. + +vdso_be_inc = gen_vdso.process('vdso-be.so', + extra_args: ['-r', '__kernel_rt_sigreturn']) + +vdso_le_inc = gen_vdso.process('vdso-le.so', + extra_args: ['-r', '__kernel_rt_sigreturn']) + +linux_user_ss.add(when: 'TARGET_AARCH64', if_true: [vdso_be_inc, vdso_le_inc]) diff --git a/linux-user/aarch64/vdso-be.so b/linux-user/aarch64/vdso-be.so new file mode 100755 index 0000000000..6084f3d1a7 Binary files /dev/null and b/linux-user/aarch64/vdso-be.so differ diff --git a/linux-user/aarch64/vdso-le.so b/linux-user/aarch64/vdso-le.so new file mode 100755 index 0000000000..947d534ec1 Binary files /dev/null and b/linux-user/aarch64/vdso-le.so differ diff --git a/linux-user/aarch64/vdso.S b/linux-user/aarch64/vdso.S new file mode 100644 index 0000000000..34d3a9ebd2 --- /dev/null +++ b/linux-user/aarch64/vdso.S @@ -0,0 +1,71 @@ +/* + * aarch64 linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include + +/* ??? These are in include/elf.h, which is not ready for inclusion in asm. */ +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + +#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT \ + (GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC) + + .section .note.gnu.property + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: .long GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT +5: .align 3 +6: + + .text + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro vdso_syscall name, nr +\name: + bti c + mov x8, #\nr + svc #0 + ret +endf \name +.endm + + .cfi_startproc + +vdso_syscall __kernel_gettimeofday, __NR_gettimeofday +vdso_syscall __kernel_clock_gettime, __NR_clock_gettime +vdso_syscall __kernel_clock_getres, __NR_clock_getres + + .cfi_endproc + + +/* + * TODO: The kernel makes a big deal of turning off the .cfi directives, + * because they cause libgcc to crash, but that's because they're wrong. + * + * For now, elide the unwind info for __kernel_rt_sigreturn and rely on + * the libgcc fallback routine as we have always done. This requires + * that the code sequence used be exact. + */ +__kernel_rt_sigreturn: + /* No BTI C insn here -- we arrive via RET. */ + mov x8, #__NR_rt_sigreturn + svc #0 +endf __kernel_rt_sigreturn diff --git a/linux-user/aarch64/vdso.ld b/linux-user/aarch64/vdso.ld new file mode 100644 index 0000000000..4c12f33352 --- /dev/null +++ b/linux-user/aarch64/vdso.ld @@ -0,0 +1,72 @@ +/* + * Linker script for linux aarch64 replacement vdso. + * + * Copyright 2021 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_2.6.39 { + global: + __kernel_rt_sigreturn; + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + /* + * We can't prelink to any address without knowing something about + * the virtual memory space of the host, since that leaks over into + * the available memory space of the guest. + */ + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load =0xd503201f +} diff --git a/linux-user/arm/Makefile.vdso b/linux-user/arm/Makefile.vdso new file mode 100644 index 0000000000..2d098a5748 --- /dev/null +++ b/linux-user/arm/Makefile.vdso @@ -0,0 +1,17 @@ +include $(BUILD_DIR)/tests/tcg/arm-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/arm +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso-be.so $(SUBDIR)/vdso-le.so + +# Adding -use-blx disables unneeded interworking without actually using blx. +LDFLAGS = -nostdlib -shared -Wl,-use-blx \ + -Wl,-h,linux-vdso.so.1 -Wl,--build-id=sha1 \ + -Wl,--hash-style=both -Wl,-T,$(SUBDIR)/vdso.ld + +$(SUBDIR)/vdso-be.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS) -mbig-endian $< + +$(SUBDIR)/vdso-le.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS) -mlittle-endian $< diff --git a/linux-user/arm/meson.build b/linux-user/arm/meson.build index 5a93c925cf..c4bb9af5b8 100644 --- a/linux-user/arm/meson.build +++ b/linux-user/arm/meson.build @@ -5,3 +5,15 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +# TARGET_BIG_ENDIAN is defined to 'n' for little-endian; which means it +# is always true as far as source_set.apply() is concerned. Always build +# both header files and include the right one via #if. + +vdso_be_inc = gen_vdso.process('vdso-be.so', + extra_args: ['-s', 'sigreturn_codes']) + +vdso_le_inc = gen_vdso.process('vdso-le.so', + extra_args: ['-s', 'sigreturn_codes']) + +linux_user_ss.add(when: 'TARGET_ARM', if_true: [vdso_be_inc, vdso_le_inc]) diff --git a/linux-user/arm/signal.c b/linux-user/arm/signal.c index 4020601c54..f77f692c63 100644 --- a/linux-user/arm/signal.c +++ b/linux-user/arm/signal.c @@ -22,6 +22,7 @@ #include "signal-common.h" #include "linux-user/trace.h" #include "target/arm/cpu-features.h" +#include "vdso-asmoffset.h" struct target_sigcontext { abi_ulong trap_no; @@ -103,6 +104,11 @@ struct rt_sigframe struct sigframe sig; }; +QEMU_BUILD_BUG_ON(offsetof(struct sigframe, retcode[3]) + != SIGFRAME_RC3_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(struct rt_sigframe, sig.retcode[3]) + != RT_SIGFRAME_RC3_OFFSET); + static abi_ptr sigreturn_fdpic_tramp; /* @@ -161,6 +167,9 @@ get_sigframe(struct target_sigaction *ka, CPUARMState *regs, int framesize) return (sp - framesize) & ~7; } +static void write_arm_sigreturn(uint32_t *rc, int syscall); +static void write_arm_fdpic_sigreturn(uint32_t *rc, int ofs); + static int setup_return(CPUARMState *env, struct target_sigaction *ka, int usig, struct sigframe *frame, abi_ulong sp_addr) @@ -168,9 +177,9 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, int usig, abi_ulong handler = 0; abi_ulong handler_fdpic_GOT = 0; abi_ulong retcode; - int thumb, retcode_idx; - int is_fdpic = info_is_fdpic(((TaskState *)thread_cpu->opaque)->info); - bool copy_retcode; + bool is_fdpic = info_is_fdpic(((TaskState *)thread_cpu->opaque)->info); + bool is_rt = ka->sa_flags & TARGET_SA_SIGINFO; + bool thumb; if (is_fdpic) { /* In FDPIC mode, ka->_sa_handler points to a function @@ -185,9 +194,7 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, int usig, } else { handler = ka->_sa_handler; } - thumb = handler & 1; - retcode_idx = thumb + (ka->sa_flags & TARGET_SA_SIGINFO ? 2 : 0); uint32_t cpsr = cpsr_read(env); @@ -203,24 +210,32 @@ setup_return(CPUARMState *env, struct target_sigaction *ka, int usig, cpsr &= ~CPSR_E; } - if (ka->sa_flags & TARGET_SA_RESTORER) { - if (is_fdpic) { - __put_user((abi_ulong)ka->sa_restorer, &frame->retcode[3]); - retcode = (sigreturn_fdpic_tramp + - retcode_idx * RETCODE_BYTES + thumb); - copy_retcode = true; - } else { - retcode = ka->sa_restorer; - copy_retcode = false; - } + /* Our vdso default_sigreturn label is a table of entry points. */ + retcode = default_sigreturn + (is_fdpic * 2 + is_rt) * 8; + + /* + * Put the sigreturn code on the stack no matter which return + * mechanism we use in order to remain ABI compliant. + * Because this is about ABI, always use the A32 instructions, + * despite the fact that our actual vdso trampoline is T16. + */ + if (is_fdpic) { + write_arm_fdpic_sigreturn(frame->retcode, + is_rt ? RT_SIGFRAME_RC3_OFFSET + : SIGFRAME_RC3_OFFSET); } else { - retcode = default_sigreturn + retcode_idx * RETCODE_BYTES + thumb; - copy_retcode = true; + write_arm_sigreturn(frame->retcode, + is_rt ? TARGET_NR_rt_sigreturn + : TARGET_NR_sigreturn); } - /* Copy the code to the stack slot for ABI compatibility. */ - if (copy_retcode) { - memcpy(frame->retcode, g2h_untagged(retcode & ~1), RETCODE_BYTES); + if (ka->sa_flags & TARGET_SA_RESTORER) { + if (is_fdpic) { + /* Place the function descriptor in slot 3. */ + __put_user((abi_ulong)ka->sa_restorer, &frame->retcode[3]); + } else { + retcode = ka->sa_restorer; + } } env->regs[0] = usig; diff --git a/linux-user/arm/vdso-asmoffset.h b/linux-user/arm/vdso-asmoffset.h new file mode 100644 index 0000000000..252a95c46e --- /dev/null +++ b/linux-user/arm/vdso-asmoffset.h @@ -0,0 +1,3 @@ +/* offsetof(struct sigframe, retcode[3]) */ +#define SIGFRAME_RC3_OFFSET 756 +#define RT_SIGFRAME_RC3_OFFSET 884 diff --git a/linux-user/arm/vdso-be.so b/linux-user/arm/vdso-be.so new file mode 100755 index 0000000000..69cafbb956 Binary files /dev/null and b/linux-user/arm/vdso-be.so differ diff --git a/linux-user/arm/vdso-le.so b/linux-user/arm/vdso-le.so new file mode 100755 index 0000000000..ad05a12518 Binary files /dev/null and b/linux-user/arm/vdso-le.so differ diff --git a/linux-user/arm/vdso.S b/linux-user/arm/vdso.S new file mode 100644 index 0000000000..b3bb6491dc --- /dev/null +++ b/linux-user/arm/vdso.S @@ -0,0 +1,174 @@ +/* + * arm linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include "vdso-asmoffset.h" + +/* + * All supported cpus have T16 instructions: at least arm4t. + * + * We support user-user with m-profile cpus as an extension, because it + * is useful for testing gcc, which requires we avoid A32 instructions. + */ + .thumb + .arch armv4t + .eabi_attribute Tag_FP_arch, 0 + .eabi_attribute Tag_ARM_ISA_use, 0 + + .text + +.macro raw_syscall n + .ifne \n < 0x100 + mov r7, #\n + .elseif \n < 0x1ff + mov r7, #0xff + add r7, #(\n - 0xff) + .else + .err + .endif + swi #0 +.endm + +.macro fdpic_thunk ofs + ldr r3, [sp, #\ofs] + ldmia r2, {r2, r3} + mov r9, r3 + bx r2 +.endm + +.macro endf name + .globl \name + .type \name, %function + .size \name, . - \name +.endm + +/* + * We must save/restore r7 for the EABI syscall number. + * While we're doing that, we might as well save LR to get a free return, + * and a branch that is interworking back to ARMv5. + */ + +.macro SYSCALL name, nr +\name: + .cfi_startproc + push {r7, lr} + .cfi_adjust_cfa_offset 8 + .cfi_offset r7, -8 + .cfi_offset lr, -4 + raw_syscall \nr + pop {r7, pc} + .cfi_endproc +endf \name +.endm + +SYSCALL __vdso_clock_gettime, __NR_clock_gettime +SYSCALL __vdso_clock_gettime64, __NR_clock_gettime64 +SYSCALL __vdso_clock_getres, __NR_clock_getres +SYSCALL __vdso_gettimeofday, __NR_gettimeofday + + +/* + * We, like the real kernel, use a table of sigreturn trampolines. + * Unlike the real kernel, we do not attempt to pack this into as + * few bytes as possible -- simply use 8 bytes per slot. + * + * Within each slot, use the exact same code sequence as the kernel, + * lest we trip up someone doing code inspection. + */ + +.macro slot n + .balign 8 + .org sigreturn_codes + 8 * \n +.endm + +.macro cfi_fdpic_r9 ofs + /* + * fd = *(r13 + ofs) + * r9 = *(fd + 4) + * + * DW_CFA_expression r9, length (7), + * DW_OP_breg13, ofs, DW_OP_deref, + * DW_OP_plus_uconst, 4, DW_OP_deref + */ + .cfi_escape 0x10, 9, 7, 0x7d, (\ofs & 0x7f) + 0x80, (\ofs >> 7), 0x06, 0x23, 4, 0x06 +.endm + +.macro cfi_fdpic_pc ofs + /* + * fd = *(r13 + ofs) + * pc = *fd + * + * DW_CFA_expression lr (14), length (5), + * DW_OP_breg13, ofs, DW_OP_deref, DW_OP_deref + */ + .cfi_escape 0x10, 14, 5, 0x7d, (\ofs & 0x7f) + 0x80, (\ofs >> 7), 0x06, 0x06 +.endm + +/* + * Start the unwind info at least one instruction before the signal + * trampoline, because the unwinder will assume we are returning + * after a call site. + */ + .cfi_startproc simple + .cfi_signal_frame + .cfi_return_column 15 + + .cfi_def_cfa sp, 32 + 64 + .cfi_offset r0, -16 * 4 + .cfi_offset r1, -15 * 4 + .cfi_offset r2, -14 * 4 + .cfi_offset r3, -13 * 4 + .cfi_offset r4, -12 * 4 + .cfi_offset r5, -11 * 4 + .cfi_offset r6, -10 * 4 + .cfi_offset r7, -9 * 4 + .cfi_offset r8, -8 * 4 + .cfi_offset r9, -7 * 4 + .cfi_offset r10, -6 * 4 + .cfi_offset r11, -5 * 4 + .cfi_offset r12, -4 * 4 + .cfi_offset r13, -3 * 4 + .cfi_offset r14, -2 * 4 + .cfi_offset r15, -1 * 4 + + nop + + .balign 16 +sigreturn_codes: + /* [EO]ABI sigreturn */ + slot 0 + raw_syscall __NR_sigreturn + + .cfi_def_cfa_offset 160 + 64 + + /* [EO]ABI rt_sigreturn */ + slot 1 + raw_syscall __NR_rt_sigreturn + + .cfi_endproc + + /* FDPIC sigreturn */ + .cfi_startproc + cfi_fdpic_pc SIGFRAME_RC3_OFFSET + cfi_fdpic_r9 SIGFRAME_RC3_OFFSET + + slot 2 + fdpic_thunk SIGFRAME_RC3_OFFSET + .cfi_endproc + + /* FDPIC rt_sigreturn */ + .cfi_startproc + cfi_fdpic_pc RT_SIGFRAME_RC3_OFFSET + cfi_fdpic_r9 RT_SIGFRAME_RC3_OFFSET + + slot 3 + fdpic_thunk RT_SIGFRAME_RC3_OFFSET + .cfi_endproc + + .balign 16 +endf sigreturn_codes diff --git a/linux-user/arm/vdso.ld b/linux-user/arm/vdso.ld new file mode 100644 index 0000000000..3b00adf27a --- /dev/null +++ b/linux-user/arm/vdso.ld @@ -0,0 +1,67 @@ +/* + * Linker script for linux arm replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_clock_getres; + __vdso_clock_gettime64; + + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load +} diff --git a/linux-user/elfload.c b/linux-user/elfload.c index baa69e5535..8761f9e26b 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -37,6 +37,19 @@ #undef ELF_ARCH #endif +#ifndef TARGET_ARCH_HAS_SIGTRAMP_PAGE +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0 +#endif + +typedef struct { + const uint8_t *image; + const uint32_t *relocs; + unsigned image_size; + unsigned reloc_count; + unsigned sigreturn_ofs; + unsigned rt_sigreturn_ofs; +} VdsoImageInfo; + #define ELF_OSABI ELFOSABI_SYSV /* from personality.h */ @@ -296,12 +309,27 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en (*regs)[15] = tswapreg(env->regs[R_ESP]); (*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff); } -#endif + +/* + * i386 is the only target which supplies AT_SYSINFO for the vdso. + * All others only supply AT_SYSINFO_EHDR. + */ +#define DLINFO_ARCH_ITEMS (vdso_info != NULL) +#define ARCH_DLINFO \ + do { \ + if (vdso_info) { \ + NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry); \ + } \ + } while (0) + +#endif /* TARGET_X86_64 */ + +#define VDSO_HEADER "vdso.c.inc" #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 -#endif +#endif /* TARGET_I386 */ #ifdef TARGET_ARM @@ -921,6 +949,13 @@ const char *elf_hwcap2_str(uint32_t bit) #undef GET_FEATURE_ID #endif /* not TARGET_AARCH64 */ + +#if TARGET_BIG_ENDIAN +# define VDSO_HEADER "vdso-be.c.inc" +#else +# define VDSO_HEADER "vdso-le.c.inc" +#endif + #endif /* TARGET_ARM */ #ifdef TARGET_SPARC @@ -1156,6 +1191,14 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 +#ifndef TARGET_PPC64 +# define VDSO_HEADER "vdso-32.c.inc" +#elif TARGET_BIG_ENDIAN +# define VDSO_HEADER "vdso-64.c.inc" +#else +# define VDSO_HEADER "vdso-64le.c.inc" +#endif + #endif #ifdef TARGET_LOONGARCH64 @@ -1166,6 +1209,8 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en #define elf_check_arch(x) ((x) == EM_LOONGARCH) +#define VDSO_HEADER "vdso.c.inc" + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { @@ -1853,6 +1898,8 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 +#define VDSO_HEADER "vdso.c.inc" + #endif /* TARGET_S390X */ #ifdef TARGET_RISCV @@ -1861,8 +1908,10 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, #ifdef TARGET_RISCV32 #define ELF_CLASS ELFCLASS32 +#define VDSO_HEADER "vdso-32.c.inc" #else #define ELF_CLASS ELFCLASS64 +#define VDSO_HEADER "vdso-64.c.inc" #endif #define ELF_HWCAP get_elf_hwcap() @@ -1898,6 +1947,8 @@ static inline void init_thread(struct target_pt_regs *regs, #define STACK_GROWS_DOWN 0 #define STACK_ALIGNMENT 64 +#define VDSO_HEADER "vdso.c.inc" + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { @@ -2205,7 +2256,8 @@ static inline void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags) { } #ifdef USE_ELF_CORE_DUMP static int elf_core_dump(int, const CPUArchState *); #endif /* USE_ELF_CORE_DUMP */ -static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias); +static void load_symbols(struct elfhdr *hdr, const ImageSource *src, + abi_ulong load_bias); /* Verify the portions of EHDR within E_IDENT for the target. This can be performed before bswapping the entire header. */ @@ -2474,7 +2526,8 @@ static abi_ulong loader_build_fdpic_loadmap(struct image_info *info, abi_ulong s static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, struct elfhdr *exec, struct image_info *info, - struct image_info *interp_info) + struct image_info *interp_info, + struct image_info *vdso_info) { abi_ulong sp; abi_ulong u_argc, u_argv, u_envp, u_auxv; @@ -2562,10 +2615,15 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } size = (DLINFO_ITEMS + 1) * 2; - if (k_base_platform) + if (k_base_platform) { size += 2; - if (k_platform) + } + if (k_platform) { size += 2; + } + if (vdso_info) { + size += 2; + } #ifdef DLINFO_ARCH_ITEMS size += DLINFO_ARCH_ITEMS * 2; #endif @@ -2647,6 +2705,9 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, if (u_platform) { NEW_AUX_ENT(AT_PLATFORM, u_platform); } + if (vdso_info) { + NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr); + } NEW_AUX_ENT (AT_NULL, 0); #undef NEW_AUX_ENT @@ -3106,10 +3167,9 @@ static bool parse_elf_property(const uint32_t *data, int *off, int datasz, } /* Process NT_GNU_PROPERTY_TYPE_0. */ -static bool parse_elf_properties(int image_fd, +static bool parse_elf_properties(const ImageSource *src, struct image_info *info, const struct elf_phdr *phdr, - char bprm_buf[BPRM_BUF_SIZE], Error **errp) { union { @@ -3137,14 +3197,8 @@ static bool parse_elf_properties(int image_fd, return false; } - if (phdr->p_offset + n <= BPRM_BUF_SIZE) { - memcpy(¬e, bprm_buf + phdr->p_offset, n); - } else { - ssize_t len = pread(image_fd, ¬e, n, phdr->p_offset); - if (len != n) { - error_setg_errno(errp, errno, "Error reading file header"); - return false; - } + if (!imgsrc_read(¬e, phdr->p_offset, n, src, errp)) { + return false; } /* @@ -3190,29 +3244,34 @@ static bool parse_elf_properties(int image_fd, } } -/* Load an ELF image into the address space. +/** + * load_elf_image: Load an ELF image into the address space. + * @image_name: the filename of the image, to use in error messages. + * @src: the ImageSource from which to read. + * @info: info collected from the loaded image. + * @ehdr: the ELF header, not yet bswapped. + * @pinterp_name: record any PT_INTERP string found. + * + * On return: @info values will be filled in, as necessary or available. + */ - IMAGE_NAME is the filename of the image, to use in error messages. - IMAGE_FD is the open file descriptor for the image. - - BPRM_BUF is a copy of the beginning of the file; this of course - contains the elf file header at offset 0. It is assumed that this - buffer is sufficiently aligned to present no problems to the host - in accessing data at aligned offsets within the buffer. - - On return: INFO values will be filled in, as necessary or available. */ - -static void load_elf_image(const char *image_name, int image_fd, - struct image_info *info, char **pinterp_name, - char bprm_buf[BPRM_BUF_SIZE]) +static void load_elf_image(const char *image_name, const ImageSource *src, + struct image_info *info, struct elfhdr *ehdr, + char **pinterp_name) { - struct elfhdr *ehdr = (struct elfhdr *)bprm_buf; - struct elf_phdr *phdr; + g_autofree struct elf_phdr *phdr = NULL; abi_ulong load_addr, load_bias, loaddr, hiaddr, error; - int i, retval, prot_exec; + int i, prot_exec; Error *err = NULL; - /* First of all, some simple consistency checks */ + /* + * First of all, some simple consistency checks. + * Note that we rely on the bswapped ehdr staying in bprm_buf, + * for later use by load_elf_binary and create_elf_tables. + */ + if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) { + goto exit_errmsg; + } if (!elf_check_ident(ehdr)) { error_setg(&err, "Invalid ELF image for this architecture"); goto exit_errmsg; @@ -3223,15 +3282,11 @@ static void load_elf_image(const char *image_name, int image_fd, goto exit_errmsg; } - i = ehdr->e_phnum * sizeof(struct elf_phdr); - if (ehdr->e_phoff + i <= BPRM_BUF_SIZE) { - phdr = (struct elf_phdr *)(bprm_buf + ehdr->e_phoff); - } else { - phdr = (struct elf_phdr *) alloca(i); - retval = pread(image_fd, phdr, i, ehdr->e_phoff); - if (retval != i) { - goto exit_read; - } + phdr = imgsrc_read_alloc(ehdr->e_phoff, + ehdr->e_phnum * sizeof(struct elf_phdr), + src, &err); + if (phdr == NULL) { + goto exit_errmsg; } bswap_phdr(phdr, ehdr->e_phnum); @@ -3268,17 +3323,10 @@ static void load_elf_image(const char *image_name, int image_fd, goto exit_errmsg; } - interp_name = g_malloc(eppnt->p_filesz); - - if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) { - memcpy(interp_name, bprm_buf + eppnt->p_offset, - eppnt->p_filesz); - } else { - retval = pread(image_fd, interp_name, eppnt->p_filesz, - eppnt->p_offset); - if (retval != eppnt->p_filesz) { - goto exit_read; - } + interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz, + src, &err); + if (interp_name == NULL) { + goto exit_errmsg; } if (interp_name[eppnt->p_filesz - 1] != 0) { error_setg(&err, "Invalid PT_INTERP entry"); @@ -3286,7 +3334,7 @@ static void load_elf_image(const char *image_name, int image_fd, } *pinterp_name = g_steal_pointer(&interp_name); } else if (eppnt->p_type == PT_GNU_PROPERTY) { - if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, &err)) { + if (!parse_elf_properties(src, info, eppnt, &err)) { goto exit_errmsg; } } else if (eppnt->p_type == PT_GNU_STACK) { @@ -3439,9 +3487,9 @@ static void load_elf_image(const char *image_name, int image_fd, * but no backing file segment. */ if (eppnt->p_filesz != 0) { - error = target_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po, + error = imgsrc_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po, elf_prot, MAP_PRIVATE | MAP_FIXED, - image_fd, eppnt->p_offset - vaddr_po); + src, eppnt->p_offset - vaddr_po); if (error == -1) { goto exit_mmap; } @@ -3473,20 +3521,11 @@ static void load_elf_image(const char *image_name, int image_fd, #ifdef TARGET_MIPS } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) { Mips_elf_abiflags_v0 abiflags; - if (eppnt->p_filesz < sizeof(Mips_elf_abiflags_v0)) { - error_setg(&err, "Invalid PT_MIPS_ABIFLAGS entry"); + + if (!imgsrc_read(&abiflags, eppnt->p_offset, sizeof(abiflags), + src, &err)) { goto exit_errmsg; } - if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) { - memcpy(&abiflags, bprm_buf + eppnt->p_offset, - sizeof(Mips_elf_abiflags_v0)); - } else { - retval = pread(image_fd, &abiflags, sizeof(Mips_elf_abiflags_v0), - eppnt->p_offset); - if (retval != sizeof(Mips_elf_abiflags_v0)) { - goto exit_read; - } - } bswap_mips_abiflags(&abiflags); info->fp_abi = abiflags.fp_abi; #endif @@ -3499,23 +3538,16 @@ static void load_elf_image(const char *image_name, int image_fd, } if (qemu_log_enabled()) { - load_symbols(ehdr, image_fd, load_bias); + load_symbols(ehdr, src, load_bias); } - debuginfo_report_elf(image_name, image_fd, load_bias); + debuginfo_report_elf(image_name, src->fd, load_bias); mmap_unlock(); - close(image_fd); + close(src->fd); return; - exit_read: - if (retval >= 0) { - error_setg(&err, "Incomplete read of file header"); - } else { - error_setg_errno(&err, errno, "Error reading file header"); - } - goto exit_errmsg; exit_mmap: error_setg_errno(&err, errno, "Error mapping file"); goto exit_errmsg; @@ -3527,6 +3559,8 @@ static void load_elf_image(const char *image_name, int image_fd, static void load_elf_interp(const char *filename, struct image_info *info, char bprm_buf[BPRM_BUF_SIZE]) { + struct elfhdr ehdr; + ImageSource src; int fd, retval; Error *err = NULL; @@ -3544,11 +3578,57 @@ static void load_elf_interp(const char *filename, struct image_info *info, exit(-1); } - if (retval < BPRM_BUF_SIZE) { - memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval); + src.fd = fd; + src.cache = bprm_buf; + src.cache_size = retval; + + load_elf_image(filename, &src, info, &ehdr, NULL); +} + +#ifdef VDSO_HEADER +#include VDSO_HEADER +#define vdso_image_info() &vdso_image_info +#else +#define vdso_image_info() NULL +#endif + +static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso) +{ + ImageSource src; + struct elfhdr ehdr; + abi_ulong load_bias, load_addr; + + src.fd = -1; + src.cache = vdso->image; + src.cache_size = vdso->image_size; + + load_elf_image("", &src, info, &ehdr, NULL); + load_addr = info->load_addr; + load_bias = info->load_bias; + + /* + * We need to relocate the VDSO image. The one built into the kernel + * is built for a fixed address. The one built for QEMU is not, since + * that requires close control of the guest address space. + * We pre-processed the image to locate all of the addresses that need + * to be updated. + */ + for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) { + abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]); + *addr = tswapal(tswapal(*addr) + load_bias); } - load_elf_image(filename, fd, info, NULL, bprm_buf); + /* Install signal trampolines, if present. */ + if (vdso->sigreturn_ofs) { + default_sigreturn = load_addr + vdso->sigreturn_ofs; + } + if (vdso->rt_sigreturn_ofs) { + default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs; + } + + /* Remove write from VDSO segment. */ + target_mprotect(info->start_data, info->end_data - info->start_data, + PROT_READ | PROT_EXEC); } static int symfind(const void *s0, const void *s1) @@ -3595,19 +3675,20 @@ static int symcmp(const void *s0, const void *s1) } /* Best attempt to load symbols from this ELF object. */ -static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) +static void load_symbols(struct elfhdr *hdr, const ImageSource *src, + abi_ulong load_bias) { int i, shnum, nsyms, sym_idx = 0, str_idx = 0; - uint64_t segsz; - struct elf_shdr *shdr; + g_autofree struct elf_shdr *shdr = NULL; char *strings = NULL; - struct syminfo *s = NULL; - struct elf_sym *new_syms, *syms = NULL; + struct elf_sym *syms = NULL; + struct elf_sym *new_syms; + uint64_t segsz; shnum = hdr->e_shnum; - i = shnum * sizeof(struct elf_shdr); - shdr = (struct elf_shdr *)alloca(i); - if (pread(fd, shdr, i, hdr->e_shoff) != i) { + shdr = imgsrc_read_alloc(hdr->e_shoff, shnum * sizeof(struct elf_shdr), + src, NULL); + if (shdr == NULL) { return; } @@ -3625,31 +3706,33 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) found: /* Now know where the strtab and symtab are. Snarf them. */ - s = g_try_new(struct syminfo, 1); - if (!s) { - goto give_up; - } segsz = shdr[str_idx].sh_size; - s->disas_strtab = strings = g_try_malloc(segsz); - if (!strings || - pread(fd, strings, segsz, shdr[str_idx].sh_offset) != segsz) { + strings = g_try_malloc(segsz); + if (!strings) { + goto give_up; + } + if (!imgsrc_read(strings, shdr[str_idx].sh_offset, segsz, src, NULL)) { goto give_up; } segsz = shdr[sym_idx].sh_size; - syms = g_try_malloc(segsz); - if (!syms || pread(fd, syms, segsz, shdr[sym_idx].sh_offset) != segsz) { - goto give_up; - } - if (segsz / sizeof(struct elf_sym) > INT_MAX) { - /* Implausibly large symbol table: give up rather than ploughing - * on with the number of symbols calculation overflowing + /* + * Implausibly large symbol table: give up rather than ploughing + * on with the number of symbols calculation overflowing. */ goto give_up; } nsyms = segsz / sizeof(struct elf_sym); + syms = g_try_malloc(segsz); + if (!syms) { + goto give_up; + } + if (!imgsrc_read(syms, shdr[sym_idx].sh_offset, segsz, src, NULL)) { + goto give_up; + } + for (i = 0; i < nsyms; ) { bswap_sym(syms + i); /* Throw away entries which we do not need. */ @@ -3674,10 +3757,12 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) goto give_up; } - /* Attempt to free the storage associated with the local symbols - that we threw away. Whether or not this has any effect on the - memory allocation depends on the malloc implementation and how - many symbols we managed to discard. */ + /* + * Attempt to free the storage associated with the local symbols + * that we threw away. Whether or not this has any effect on the + * memory allocation depends on the malloc implementation and how + * many symbols we managed to discard. + */ new_syms = g_try_renew(struct elf_sym, syms, nsyms); if (new_syms == NULL) { goto give_up; @@ -3686,20 +3771,23 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) qsort(syms, nsyms, sizeof(*syms), symcmp); - s->disas_num_syms = nsyms; -#if ELF_CLASS == ELFCLASS32 - s->disas_symtab.elf32 = syms; -#else - s->disas_symtab.elf64 = syms; -#endif - s->lookup_symbol = lookup_symbolxx; - s->next = syminfos; - syminfos = s; + { + struct syminfo *s = g_new(struct syminfo, 1); + s->disas_strtab = strings; + s->disas_num_syms = nsyms; +#if ELF_CLASS == ELFCLASS32 + s->disas_symtab.elf32 = syms; +#else + s->disas_symtab.elf64 = syms; +#endif + s->lookup_symbol = lookup_symbolxx; + s->next = syminfos; + syminfos = s; + } return; -give_up: - g_free(s); + give_up: g_free(strings); g_free(syms); } @@ -3741,8 +3829,14 @@ uint32_t get_elf_eflags(int fd) int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) { - struct image_info interp_info; - struct elfhdr elf_ex; + /* + * We need a copy of the elf header for passing to create_elf_tables. + * We will have overwritten the original when we re-use bprm->buf + * while loading the interpreter. Allocate the storage for this now + * and let elf_load_image do any swapping that may be required. + */ + struct elfhdr ehdr; + struct image_info interp_info, vdso_info; char *elf_interpreter = NULL; char *scratch; @@ -3751,13 +3845,7 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN; #endif - load_elf_image(bprm->filename, bprm->fd, info, - &elf_interpreter, bprm->buf); - - /* ??? We need a copy of the elf header for passing to create_elf_tables. - If we do nothing, we'll have overwritten this when we re-use bprm->buf - when we load the interpreter. */ - elf_ex = *(struct elfhdr *)bprm->buf; + load_elf_image(bprm->filename, &bprm->src, info, &ehdr, &elf_interpreter); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ @@ -3829,10 +3917,14 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) } /* - * TODO: load a vdso, which would also contain the signal trampolines. - * Otherwise, allocate a private page to hold them. + * Load a vdso if available, which will amongst other things contain the + * signal trampolines. Otherwise, allocate a separate page for them. */ - if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) { + const VdsoImageInfo *vdso = vdso_image_info(); + if (vdso) { + load_elf_vdso(&vdso_info, vdso); + info->vdso = vdso_info.load_bias; + } else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) { abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); @@ -3844,8 +3936,9 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC); } - bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex, - info, (elf_interpreter ? &interp_info : NULL)); + bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr, info, + elf_interpreter ? &interp_info : NULL, + vdso ? &vdso_info : NULL); info->start_stack = bprm->p; /* If we have an interpreter, set that as the program's entry point. diff --git a/linux-user/flatload.c b/linux-user/flatload.c index fdcc4610fa..5b62aa0a2b 100644 --- a/linux-user/flatload.c +++ b/linux-user/flatload.c @@ -463,7 +463,7 @@ static int load_flat_file(struct linux_binprm * bprm, DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); textpos = target_mmap(0, text_len, PROT_READ|PROT_EXEC, - MAP_PRIVATE, bprm->fd, 0); + MAP_PRIVATE, bprm->src.fd, 0); if (textpos == -1) { fprintf(stderr, "Unable to mmap process text\n"); return -1; @@ -490,7 +490,7 @@ static int load_flat_file(struct linux_binprm * bprm, } else #endif { - result = target_pread(bprm->fd, datapos, + result = target_pread(bprm->src.fd, datapos, data_len + (relocs * sizeof(abi_ulong)), fpos); } @@ -540,10 +540,10 @@ static int load_flat_file(struct linux_binprm * bprm, else #endif { - result = target_pread(bprm->fd, textpos, + result = target_pread(bprm->src.fd, textpos, text_len, 0); if (result >= 0) { - result = target_pread(bprm->fd, datapos, + result = target_pread(bprm->src.fd, datapos, data_len + (relocs * sizeof(abi_ulong)), ntohl(hdr->data_start)); } diff --git a/linux-user/gen-vdso-elfn.c.inc b/linux-user/gen-vdso-elfn.c.inc new file mode 100644 index 0000000000..95856eb839 --- /dev/null +++ b/linux-user/gen-vdso-elfn.c.inc @@ -0,0 +1,314 @@ +/* + * Post-process a vdso elf image for inclusion into qemu. + * Elf size specialization. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +static void elfN(bswap_ehdr)(ElfN(Ehdr) *ehdr) +{ + bswaps(&ehdr->e_type); /* Object file type */ + bswaps(&ehdr->e_machine); /* Architecture */ + bswaps(&ehdr->e_version); /* Object file version */ + bswaps(&ehdr->e_entry); /* Entry point virtual address */ + bswaps(&ehdr->e_phoff); /* Program header table file offset */ + bswaps(&ehdr->e_shoff); /* Section header table file offset */ + bswaps(&ehdr->e_flags); /* Processor-specific flags */ + bswaps(&ehdr->e_ehsize); /* ELF header size in bytes */ + bswaps(&ehdr->e_phentsize); /* Program header table entry size */ + bswaps(&ehdr->e_phnum); /* Program header table entry count */ + bswaps(&ehdr->e_shentsize); /* Section header table entry size */ + bswaps(&ehdr->e_shnum); /* Section header table entry count */ + bswaps(&ehdr->e_shstrndx); /* Section header string table index */ +} + +static void elfN(bswap_phdr)(ElfN(Phdr) *phdr) +{ + bswaps(&phdr->p_type); /* Segment type */ + bswaps(&phdr->p_flags); /* Segment flags */ + bswaps(&phdr->p_offset); /* Segment file offset */ + bswaps(&phdr->p_vaddr); /* Segment virtual address */ + bswaps(&phdr->p_paddr); /* Segment physical address */ + bswaps(&phdr->p_filesz); /* Segment size in file */ + bswaps(&phdr->p_memsz); /* Segment size in memory */ + bswaps(&phdr->p_align); /* Segment alignment */ +} + +static void elfN(bswap_shdr)(ElfN(Shdr) *shdr) +{ + bswaps(&shdr->sh_name); + bswaps(&shdr->sh_type); + bswaps(&shdr->sh_flags); + bswaps(&shdr->sh_addr); + bswaps(&shdr->sh_offset); + bswaps(&shdr->sh_size); + bswaps(&shdr->sh_link); + bswaps(&shdr->sh_info); + bswaps(&shdr->sh_addralign); + bswaps(&shdr->sh_entsize); +} + +static void elfN(bswap_sym)(ElfN(Sym) *sym) +{ + bswaps(&sym->st_name); + bswaps(&sym->st_value); + bswaps(&sym->st_size); + bswaps(&sym->st_shndx); +} + +static void elfN(bswap_dyn)(ElfN(Dyn) *dyn) +{ + bswaps(&dyn->d_tag); /* Dynamic type tag */ + bswaps(&dyn->d_un.d_ptr); /* Dynamic ptr or val, in union */ +} + +static void elfN(search_symtab)(ElfN(Shdr) *shdr, unsigned sym_idx, + void *buf, bool need_bswap) +{ + unsigned str_idx = shdr[sym_idx].sh_link; + ElfN(Sym) *sym = buf + shdr[sym_idx].sh_offset; + unsigned sym_n = shdr[sym_idx].sh_size / sizeof(*sym); + const char *str = buf + shdr[str_idx].sh_offset; + + for (unsigned i = 0; i < sym_n; ++i) { + const char *name; + + if (need_bswap) { + elfN(bswap_sym)(sym + i); + } + name = str + sym[i].st_name; + + if (sigreturn_sym && strcmp(sigreturn_sym, name) == 0) { + sigreturn_addr = sym[i].st_value; + } + if (rt_sigreturn_sym && strcmp(rt_sigreturn_sym, name) == 0) { + rt_sigreturn_addr = sym[i].st_value; + } + } +} + +static void elfN(process)(FILE *outf, void *buf, bool need_bswap) +{ + ElfN(Ehdr) *ehdr = buf; + ElfN(Phdr) *phdr; + ElfN(Shdr) *shdr; + unsigned phnum, shnum; + unsigned dynamic_ofs = 0; + unsigned dynamic_addr = 0; + unsigned symtab_idx = 0; + unsigned dynsym_idx = 0; + unsigned first_segsz = 0; + int errors = 0; + + if (need_bswap) { + elfN(bswap_ehdr)(ehdr); + } + + phnum = ehdr->e_phnum; + phdr = buf + ehdr->e_phoff; + if (need_bswap) { + for (unsigned i = 0; i < phnum; ++i) { + elfN(bswap_phdr)(phdr + i); + } + } + + shnum = ehdr->e_shnum; + shdr = buf + ehdr->e_shoff; + if (need_bswap) { + for (unsigned i = 0; i < shnum; ++i) { + elfN(bswap_shdr)(shdr + i); + } + } + for (unsigned i = 0; i < shnum; ++i) { + switch (shdr[i].sh_type) { + case SHT_SYMTAB: + symtab_idx = i; + break; + case SHT_DYNSYM: + dynsym_idx = i; + break; + } + } + + /* + * Validate the VDSO is created as we expect: that PT_PHDR, + * PT_DYNAMIC, and PT_NOTE located in a writable data segment. + * PHDR and DYNAMIC require relocation, and NOTE will get the + * linux version number. + */ + for (unsigned i = 0; i < phnum; ++i) { + if (phdr[i].p_type != PT_LOAD) { + continue; + } + if (first_segsz != 0) { + fprintf(stderr, "Multiple LOAD segments\n"); + errors++; + } + if (phdr[i].p_offset != 0) { + fprintf(stderr, "LOAD segment does not cover EHDR\n"); + errors++; + } + if (phdr[i].p_vaddr != 0) { + fprintf(stderr, "LOAD segment not loaded at address 0\n"); + errors++; + } + first_segsz = phdr[i].p_filesz; + if (first_segsz < ehdr->e_phoff + phnum * sizeof(*phdr)) { + fprintf(stderr, "LOAD segment does not cover PHDRs\n"); + errors++; + } + if ((phdr[i].p_flags & (PF_R | PF_W)) != (PF_R | PF_W)) { + fprintf(stderr, "LOAD segment is not read-write\n"); + errors++; + } + } + for (unsigned i = 0; i < phnum; ++i) { + const char *which; + + switch (phdr[i].p_type) { + case PT_PHDR: + which = "PT_PHDR"; + break; + case PT_NOTE: + which = "PT_NOTE"; + break; + case PT_DYNAMIC: + dynamic_ofs = phdr[i].p_offset; + dynamic_addr = phdr[i].p_vaddr; + which = "PT_DYNAMIC"; + break; + default: + continue; + } + if (first_segsz < phdr[i].p_vaddr + phdr[i].p_filesz) { + fprintf(stderr, "LOAD segment does not cover %s\n", which); + errors++; + } + } + if (errors) { + exit(EXIT_FAILURE); + } + + /* Relocate the program headers. */ + for (unsigned i = 0; i < phnum; ++i) { + output_reloc(outf, buf, &phdr[i].p_vaddr); + output_reloc(outf, buf, &phdr[i].p_paddr); + } + + /* Relocate the DYNAMIC entries. */ + if (dynamic_addr) { + ElfN(Dyn) *dyn = buf + dynamic_ofs; + __typeof(dyn->d_tag) tag; + + do { + + if (need_bswap) { + elfN(bswap_dyn)(dyn); + } + tag = dyn->d_tag; + + switch (tag) { + case DT_HASH: + case DT_SYMTAB: + case DT_STRTAB: + case DT_VERDEF: + case DT_VERSYM: + case DT_PLTGOT: + case DT_ADDRRNGLO ... DT_ADDRRNGHI: + /* These entries store an address in the entry. */ + output_reloc(outf, buf, &dyn->d_un.d_val); + break; + + case DT_NULL: + case DT_STRSZ: + case DT_SONAME: + case DT_DEBUG: + case DT_FLAGS: + case DT_FLAGS_1: + case DT_SYMBOLIC: + case DT_BIND_NOW: + case DT_VERDEFNUM: + case DT_VALRNGLO ... DT_VALRNGHI: + /* These entries store an integer in the entry. */ + break; + + case DT_SYMENT: + if (dyn->d_un.d_val != sizeof(ElfN(Sym))) { + fprintf(stderr, "VDSO has incorrect dynamic symbol size\n"); + errors++; + } + break; + + case DT_REL: + case DT_RELSZ: + case DT_RELA: + case DT_RELASZ: + /* + * These entries indicate that the VDSO was built incorrectly. + * It should not have any real relocations. + * ??? The RISC-V toolchain will emit these even when there + * are no relocations. Validate zeros. + */ + if (dyn->d_un.d_val != 0) { + fprintf(stderr, "VDSO has dynamic relocations\n"); + errors++; + } + break; + case DT_RELENT: + case DT_RELAENT: + case DT_TEXTREL: + /* These entries store an integer in the entry. */ + /* Should not be required; see above. */ + break; + + case DT_NEEDED: + case DT_VERNEED: + case DT_PLTREL: + case DT_JMPREL: + case DT_RPATH: + case DT_RUNPATH: + fprintf(stderr, "VDSO has external dependencies\n"); + errors++; + break; + + case PT_LOPROC + 3: + if (ehdr->e_machine == EM_PPC64) { + break; /* DT_PPC64_OPT: integer bitmask */ + } + goto do_default; + + default: + do_default: + /* This is probably something target specific. */ + fprintf(stderr, "VDSO has unknown DYNAMIC entry (%lx)\n", + (unsigned long)tag); + errors++; + break; + } + dyn++; + } while (tag != DT_NULL); + if (errors) { + exit(EXIT_FAILURE); + } + } + + /* Relocate the dynamic symbol table. */ + if (dynsym_idx) { + ElfN(Sym) *sym = buf + shdr[dynsym_idx].sh_offset; + unsigned sym_n = shdr[dynsym_idx].sh_size / sizeof(*sym); + + for (unsigned i = 0; i < sym_n; ++i) { + output_reloc(outf, buf, &sym[i].st_value); + } + } + + /* Search both dynsym and symtab for the signal return symbols. */ + if (dynsym_idx) { + elfN(search_symtab)(shdr, dynsym_idx, buf, need_bswap); + } + if (symtab_idx) { + elfN(search_symtab)(shdr, symtab_idx, buf, need_bswap); + } +} diff --git a/linux-user/gen-vdso.c b/linux-user/gen-vdso.c new file mode 100644 index 0000000000..31e333be80 --- /dev/null +++ b/linux-user/gen-vdso.c @@ -0,0 +1,223 @@ +/* + * Post-process a vdso elf image for inclusion into qemu. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "elf.h" + + +#define bswap_(p) _Generic(*(p), \ + uint16_t: __builtin_bswap16, \ + uint32_t: __builtin_bswap32, \ + uint64_t: __builtin_bswap64, \ + int16_t: __builtin_bswap16, \ + int32_t: __builtin_bswap32, \ + int64_t: __builtin_bswap64) +#define bswaps(p) (*(p) = bswap_(p)(*(p))) + +static void output_reloc(FILE *outf, void *buf, void *loc) +{ + fprintf(outf, " 0x%08tx,\n", loc - buf); +} + +static const char *sigreturn_sym; +static const char *rt_sigreturn_sym; + +static unsigned sigreturn_addr; +static unsigned rt_sigreturn_addr; + +#define N 32 +#define elfN(x) elf32_##x +#define ElfN(x) Elf32_##x +#include "gen-vdso-elfn.c.inc" +#undef N +#undef elfN +#undef ElfN + +#define N 64 +#define elfN(x) elf64_##x +#define ElfN(x) Elf64_##x +#include "gen-vdso-elfn.c.inc" +#undef N +#undef elfN +#undef ElfN + + +int main(int argc, char **argv) +{ + FILE *inf, *outf; + long total_len; + const char *prefix = "vdso"; + const char *inf_name; + const char *outf_name = NULL; + unsigned char *buf; + bool need_bswap; + + while (1) { + int opt = getopt(argc, argv, "o:p:r:s:"); + if (opt < 0) { + break; + } + switch (opt) { + case 'o': + outf_name = optarg; + break; + case 'p': + prefix = optarg; + break; + case 'r': + rt_sigreturn_sym = optarg; + break; + case 's': + sigreturn_sym = optarg; + break; + default: + usage: + fprintf(stderr, "usage: [-p prefix] [-r rt-sigreturn-name] " + "[-s sigreturn-name] -o output-file input-file\n"); + return EXIT_FAILURE; + } + } + + if (optind >= argc || outf_name == NULL) { + goto usage; + } + inf_name = argv[optind]; + + /* + * Open the input and output files. + */ + inf = fopen(inf_name, "rb"); + if (inf == NULL) { + goto perror_inf; + } + outf = fopen(outf_name, "w"); + if (outf == NULL) { + goto perror_outf; + } + + /* + * Read the input file into a buffer. + * We expect the vdso to be small, on the order of one page, + * therefore we do not expect a partial read. + */ + fseek(inf, 0, SEEK_END); + total_len = ftell(inf); + fseek(inf, 0, SEEK_SET); + + buf = malloc(total_len); + if (buf == NULL) { + goto perror_inf; + } + + errno = 0; + if (fread(buf, 1, total_len, inf) != total_len) { + if (errno) { + goto perror_inf; + } + fprintf(stderr, "%s: incomplete read\n", inf_name); + return EXIT_FAILURE; + } + fclose(inf); + + /* + * Write out the vdso image now, before we make local changes. + */ + + fprintf(outf, + "/* Automatically generated from linux-user/gen-vdso.c. */\n" + "\n" + "static const uint8_t %s_image[] = {", + prefix); + for (long i = 0; i < total_len; ++i) { + if (i % 12 == 0) { + fputs("\n ", outf); + } + fprintf(outf, " 0x%02x,", buf[i]); + } + fprintf(outf, "\n};\n\n"); + + /* + * Identify which elf flavor we're processing. + * The first 16 bytes of the file are e_ident. + */ + + if (buf[EI_MAG0] != ELFMAG0 || buf[EI_MAG1] != ELFMAG1 || + buf[EI_MAG2] != ELFMAG2 || buf[EI_MAG3] != ELFMAG3) { + fprintf(stderr, "%s: not an elf file\n", inf_name); + return EXIT_FAILURE; + } + switch (buf[EI_DATA]) { + case ELFDATA2LSB: + need_bswap = BYTE_ORDER != LITTLE_ENDIAN; + break; + case ELFDATA2MSB: + need_bswap = BYTE_ORDER != BIG_ENDIAN; + break; + default: + fprintf(stderr, "%s: invalid elf EI_DATA (%u)\n", + inf_name, buf[EI_DATA]); + return EXIT_FAILURE; + } + + /* + * We need to relocate the VDSO image. The one built into the kernel + * is built for a fixed address. The one we built for QEMU is not, + * since that requires close control of the guest address space. + * + * Output relocation addresses as we go. + */ + + fprintf(outf, "static const unsigned %s_relocs[] = {\n", prefix); + + switch (buf[EI_CLASS]) { + case ELFCLASS32: + elf32_process(outf, buf, need_bswap); + break; + case ELFCLASS64: + elf64_process(outf, buf, need_bswap); + break; + default: + fprintf(stderr, "%s: invalid elf EI_CLASS (%u)\n", + inf_name, buf[EI_CLASS]); + return EXIT_FAILURE; + } + + fprintf(outf, "};\n\n"); /* end vdso_relocs. */ + + fprintf(outf, "static const VdsoImageInfo %s_image_info = {\n", prefix); + fprintf(outf, " .image = %s_image,\n", prefix); + fprintf(outf, " .relocs = %s_relocs,\n", prefix); + fprintf(outf, " .image_size = sizeof(%s_image),\n", prefix); + fprintf(outf, " .reloc_count = ARRAY_SIZE(%s_relocs),\n", prefix); + fprintf(outf, " .sigreturn_ofs = 0x%x,\n", sigreturn_addr); + fprintf(outf, " .rt_sigreturn_ofs = 0x%x,\n", rt_sigreturn_addr); + fprintf(outf, "};\n"); + + /* + * Everything should have gone well. + */ + if (fclose(outf)) { + goto perror_outf; + } + return EXIT_SUCCESS; + + perror_inf: + perror(inf_name); + return EXIT_FAILURE; + + perror_outf: + perror(outf_name); + return EXIT_FAILURE; +} diff --git a/linux-user/hppa/Makefile.vdso b/linux-user/hppa/Makefile.vdso new file mode 100644 index 0000000000..f4537ae716 --- /dev/null +++ b/linux-user/hppa/Makefile.vdso @@ -0,0 +1,11 @@ +include $(BUILD_DIR)/tests/tcg/hppa-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/hppa +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso.so + +$(SUBDIR)/vdso.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ -nostdlib -shared -Wl,-h,linux-vdso32.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both \ + -Wl,-T,$(SUBDIR)/vdso.ld $< diff --git a/linux-user/hppa/meson.build b/linux-user/hppa/meson.build index 4709508a09..aa2d9a87a6 100644 --- a/linux-user/hppa/meson.build +++ b/linux-user/hppa/meson.build @@ -3,3 +3,8 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +vdso_inc = gen_vdso.process('vdso.so', + extra_args: [ '-r', '__kernel_sigtramp_rt' ]) + +linux_user_ss.add(when: 'TARGET_HPPA', if_true: vdso_inc) diff --git a/linux-user/hppa/signal.c b/linux-user/hppa/signal.c index ec5f5412d1..17920e9ceb 100644 --- a/linux-user/hppa/signal.c +++ b/linux-user/hppa/signal.c @@ -21,6 +21,7 @@ #include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" +#include "vdso-asmoffset.h" struct target_sigcontext { abi_ulong sc_flags; @@ -47,6 +48,19 @@ struct target_rt_sigframe { /* hidden location of upper halves of pa2.0 64-bit gregs */ }; +QEMU_BUILD_BUG_ON(sizeof(struct target_rt_sigframe) != sizeof_rt_sigframe); +QEMU_BUILD_BUG_ON(offsetof(struct target_rt_sigframe, uc.tuc_mcontext) + != offsetof_sigcontext); +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_gr) + != offsetof_sigcontext_gr); +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_fr) + != offsetof_sigcontext_fr); +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_iaoq) + != offsetof_sigcontext_iaoq); +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_sar) + != offsetof_sigcontext_sar); + + static void setup_sigcontext(struct target_sigcontext *sc, CPUArchState *env) { int i; @@ -91,16 +105,6 @@ static void restore_sigcontext(CPUArchState *env, struct target_sigcontext *sc) __get_user(env->cr[CR_SAR], &sc->sc_sar); } -#if TARGET_ABI_BITS == 32 -#define SIGFRAME 64 -#define FUNCTIONCALLFRAME 48 -#else -#define SIGFRAME 128 -#define FUNCTIONCALLFRAME 96 -#endif -#define PARISC_RT_SIGFRAME_SIZE32 \ - ((sizeof(struct target_rt_sigframe) + FUNCTIONCALLFRAME + SIGFRAME) & -SIGFRAME) - void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, target_sigset_t *set, CPUArchState *env) diff --git a/linux-user/hppa/vdso-asmoffset.h b/linux-user/hppa/vdso-asmoffset.h new file mode 100644 index 0000000000..c8b40c0332 --- /dev/null +++ b/linux-user/hppa/vdso-asmoffset.h @@ -0,0 +1,12 @@ +#define sizeof_rt_sigframe 584 +#define offsetof_sigcontext 160 +#define offsetof_sigcontext_gr 0x4 +#define offsetof_sigcontext_fr 0x88 +#define offsetof_sigcontext_iaoq 0x190 +#define offsetof_sigcontext_sar 0x198 + +/* arch/parisc/include/asm/rt_sigframe.h */ +#define SIGFRAME 64 +#define FUNCTIONCALLFRAME 48 +#define PARISC_RT_SIGFRAME_SIZE32 \ + (((sizeof_rt_sigframe) + FUNCTIONCALLFRAME + SIGFRAME) & -SIGFRAME) diff --git a/linux-user/hppa/vdso.S b/linux-user/hppa/vdso.S new file mode 100644 index 0000000000..5be14d2f70 --- /dev/null +++ b/linux-user/hppa/vdso.S @@ -0,0 +1,165 @@ +/* + * hppa linux kernel vdso replacement. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include "vdso-asmoffset.h" + + .text + + +/* + * arch/parisc/kernel/vdso32/sigtramp.S: + * Gdb expects the trampoline is on the stack and the pc is offset from + * a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline + * is not on the stack, we need a new variant with different offsets and + * data to tell gdb where to find the signal context on the stack. + * + * Here we put the offset to the context data at the start of the trampoline + * region and offset the first trampoline by 2 instructions. Please do + * not change the trampoline as the code in gdb depends on the following + * instruction sequence exactly. + */ + +/* arch/parisc/kernel/asm-offsets.c */ +#define SIGFRAME_CONTEXT_REGS32 \ + (offsetof_sigcontext - PARISC_RT_SIGFRAME_SIZE32) + + .align 64 + .word SIGFRAME_CONTEXT_REGS32 + +/* + * All that said, we can provide a proper unwind record, which means that + * GDB should not actually need the offset magic. + * + * The return address that arrived here, from the inner frame, is + * not marked as a signal frame and so the unwinder still tries to + * subtract 1 to examine the presumed call insn. Thus we must + * extend the unwind info to a nop before the start. + */ + + .cfi_startproc simple + .cfi_signal_frame + + /* Compare pa32_fallback_frame_state from libgcc. */ + + /* + * Place the CFA at the start of sigcontext for convenience. + * The previous CFA will be restored from the saved stack pointer. + */ + .cfi_def_cfa 30, -PARISC_RT_SIGFRAME_SIZE32 + offsetof_sigcontext + + /* Record save offset of general registers. */ + .cfi_offset 1, offsetof_sigcontext_gr + 1 * 4 + .cfi_offset 2, offsetof_sigcontext_gr + 2 * 4 + .cfi_offset 3, offsetof_sigcontext_gr + 3 * 4 + .cfi_offset 4, offsetof_sigcontext_gr + 4 * 4 + .cfi_offset 5, offsetof_sigcontext_gr + 5 * 4 + .cfi_offset 6, offsetof_sigcontext_gr + 6 * 4 + .cfi_offset 7, offsetof_sigcontext_gr + 7 * 4 + .cfi_offset 8, offsetof_sigcontext_gr + 8 * 4 + .cfi_offset 9, offsetof_sigcontext_gr + 9 * 4 + .cfi_offset 10, offsetof_sigcontext_gr + 10 * 4 + .cfi_offset 11, offsetof_sigcontext_gr + 11 * 4 + .cfi_offset 12, offsetof_sigcontext_gr + 12 * 4 + .cfi_offset 13, offsetof_sigcontext_gr + 13 * 4 + .cfi_offset 14, offsetof_sigcontext_gr + 14 * 4 + .cfi_offset 15, offsetof_sigcontext_gr + 15 * 4 + .cfi_offset 16, offsetof_sigcontext_gr + 16 * 4 + .cfi_offset 17, offsetof_sigcontext_gr + 17 * 4 + .cfi_offset 18, offsetof_sigcontext_gr + 18 * 4 + .cfi_offset 19, offsetof_sigcontext_gr + 19 * 4 + .cfi_offset 20, offsetof_sigcontext_gr + 20 * 4 + .cfi_offset 21, offsetof_sigcontext_gr + 21 * 4 + .cfi_offset 22, offsetof_sigcontext_gr + 22 * 4 + .cfi_offset 23, offsetof_sigcontext_gr + 23 * 4 + .cfi_offset 24, offsetof_sigcontext_gr + 24 * 4 + .cfi_offset 25, offsetof_sigcontext_gr + 25 * 4 + .cfi_offset 26, offsetof_sigcontext_gr + 26 * 4 + .cfi_offset 27, offsetof_sigcontext_gr + 27 * 4 + .cfi_offset 28, offsetof_sigcontext_gr + 28 * 4 + .cfi_offset 29, offsetof_sigcontext_gr + 29 * 4 + .cfi_offset 30, offsetof_sigcontext_gr + 30 * 4 + .cfi_offset 31, offsetof_sigcontext_gr + 31 * 4 + + /* Record save offset of fp registers, left and right halves. */ + .cfi_offset 32, offsetof_sigcontext_fr + 4 * 8 + .cfi_offset 33, offsetof_sigcontext_fr + 4 * 8 + 4 + .cfi_offset 34, offsetof_sigcontext_fr + 5 * 8 + .cfi_offset 35, offsetof_sigcontext_fr + 5 * 8 + 4 + .cfi_offset 36, offsetof_sigcontext_fr + 6 * 8 + .cfi_offset 37, offsetof_sigcontext_fr + 6 * 8 + 4 + .cfi_offset 38, offsetof_sigcontext_fr + 7 * 8 + .cfi_offset 39, offsetof_sigcontext_fr + 7 * 8 + 4 + .cfi_offset 40, offsetof_sigcontext_fr + 8 * 8 + .cfi_offset 41, offsetof_sigcontext_fr + 8 * 8 + 4 + .cfi_offset 42, offsetof_sigcontext_fr + 9 * 8 + .cfi_offset 43, offsetof_sigcontext_fr + 9 * 8 + 4 + .cfi_offset 44, offsetof_sigcontext_fr + 10 * 8 + .cfi_offset 45, offsetof_sigcontext_fr + 10 * 8 + 4 + .cfi_offset 46, offsetof_sigcontext_fr + 11 * 8 + .cfi_offset 47, offsetof_sigcontext_fr + 11 * 8 + 4 + .cfi_offset 48, offsetof_sigcontext_fr + 12 * 8 + .cfi_offset 49, offsetof_sigcontext_fr + 12 * 8 + 4 + .cfi_offset 50, offsetof_sigcontext_fr + 13 * 8 + .cfi_offset 51, offsetof_sigcontext_fr + 13 * 8 + 4 + .cfi_offset 52, offsetof_sigcontext_fr + 14 * 8 + .cfi_offset 53, offsetof_sigcontext_fr + 14 * 8 + 4 + .cfi_offset 54, offsetof_sigcontext_fr + 15 * 8 + .cfi_offset 55, offsetof_sigcontext_fr + 15 * 8 + 4 + .cfi_offset 56, offsetof_sigcontext_fr + 16 * 8 + .cfi_offset 57, offsetof_sigcontext_fr + 16 * 8 + 4 + .cfi_offset 58, offsetof_sigcontext_fr + 17 * 8 + .cfi_offset 59, offsetof_sigcontext_fr + 17 * 8 + 4 + .cfi_offset 60, offsetof_sigcontext_fr + 18 * 8 + .cfi_offset 61, offsetof_sigcontext_fr + 18 * 8 + 4 + .cfi_offset 62, offsetof_sigcontext_fr + 19 * 8 + .cfi_offset 63, offsetof_sigcontext_fr + 19 * 8 + 4 + .cfi_offset 64, offsetof_sigcontext_fr + 20 * 8 + .cfi_offset 65, offsetof_sigcontext_fr + 20 * 8 + 4 + .cfi_offset 66, offsetof_sigcontext_fr + 21 * 8 + .cfi_offset 67, offsetof_sigcontext_fr + 21 * 8 + 4 + .cfi_offset 68, offsetof_sigcontext_fr + 22 * 8 + .cfi_offset 69, offsetof_sigcontext_fr + 22 * 8 + 4 + .cfi_offset 70, offsetof_sigcontext_fr + 23 * 8 + .cfi_offset 71, offsetof_sigcontext_fr + 23 * 8 + 4 + .cfi_offset 72, offsetof_sigcontext_fr + 24 * 8 + .cfi_offset 73, offsetof_sigcontext_fr + 24 * 8 + 4 + .cfi_offset 74, offsetof_sigcontext_fr + 25 * 8 + .cfi_offset 75, offsetof_sigcontext_fr + 25 * 8 + 4 + .cfi_offset 76, offsetof_sigcontext_fr + 26 * 8 + .cfi_offset 77, offsetof_sigcontext_fr + 26 * 8 + 4 + .cfi_offset 78, offsetof_sigcontext_fr + 27 * 8 + .cfi_offset 79, offsetof_sigcontext_fr + 27 * 8 + 4 + .cfi_offset 80, offsetof_sigcontext_fr + 28 * 8 + .cfi_offset 81, offsetof_sigcontext_fr + 28 * 8 + 4 + .cfi_offset 82, offsetof_sigcontext_fr + 29 * 8 + .cfi_offset 83, offsetof_sigcontext_fr + 29 * 8 + 4 + .cfi_offset 84, offsetof_sigcontext_fr + 30 * 8 + .cfi_offset 85, offsetof_sigcontext_fr + 30 * 8 + 4 + .cfi_offset 86, offsetof_sigcontext_fr + 31 * 8 + .cfi_offset 87, offsetof_sigcontext_fr + 31 * 8 + 4 + + /* Record save offset of %sar */ + .cfi_offset 88, offsetof_sigcontext_sar + + /* Record save offset of return address, iaoq[0]. */ + .cfi_return_column 89 + .cfi_offset 89, offsetof_sigcontext_iaoq + + nop + +__kernel_sigtramp_rt: + ldi 0, %r25 + ldi __NR_rt_sigreturn, %r20 + be,l 0x100(%sr2, %r0), %sr0, %r31 + nop + + .cfi_endproc + .size __kernel_sigtramp_rt, . - __kernel_sigtramp_rt + .type __kernel_sigtramp_rt, @function + .globl __kernel_sigtramp_rt diff --git a/linux-user/hppa/vdso.ld b/linux-user/hppa/vdso.ld new file mode 100644 index 0000000000..b17ad974f3 --- /dev/null +++ b/linux-user/hppa/vdso.ld @@ -0,0 +1,77 @@ +/* + * Linker script for linux hppa vdso. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +VERSION { + /* + * The kernel's vdso32.lds.S attempts to export + * __kernel_sigtramp_rt32 + * __kernel_restart_syscall32 + * except that those symbols don't exist. The actual symbols are + * __kernel_sigtramp_rt + * __kernel_restart_syscall + * which means that nothing is exported at all. + * QEMU handles syscall restart internally, so we don't + * need to implement __kernel_restart_syscall at all. + */ + LINUX_5.18 { + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; + dynamic PT_DYNAMIC FLAGS(4); + note PT_NOTE FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* The following, including the FILEHDRS and PHDRS, are modified + when we relocate the binary. We want them to be initially + writable for the relocation; we'll force them read-only after. */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + .data : { + /* There ought not be any real read-write data. + But since we manipulated the segment layout, + we have to put these sections somewhere. */ + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load +} diff --git a/linux-user/hppa/vdso.so b/linux-user/hppa/vdso.so new file mode 100755 index 0000000000..e1ddd70c37 Binary files /dev/null and b/linux-user/hppa/vdso.so differ diff --git a/linux-user/i386/Makefile.vdso b/linux-user/i386/Makefile.vdso new file mode 100644 index 0000000000..95bc616f6d --- /dev/null +++ b/linux-user/i386/Makefile.vdso @@ -0,0 +1,11 @@ +include $(BUILD_DIR)/tests/tcg/i386-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/i386 +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso.so + +$(SUBDIR)/vdso.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ -m32 -nostdlib -shared -Wl,-h,linux-gate.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both \ + -Wl,-T,$(SUBDIR)/vdso.ld $< diff --git a/linux-user/i386/meson.build b/linux-user/i386/meson.build index ee523019a5..d42fc6cbc9 100644 --- a/linux-user/i386/meson.build +++ b/linux-user/i386/meson.build @@ -3,3 +3,10 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +vdso_inc = gen_vdso.process('vdso.so', extra_args: [ + '-s', '__kernel_sigreturn', + '-r', '__kernel_rt_sigreturn' + ]) + +linux_user_ss.add(when: 'TARGET_I386', if_true: vdso_inc) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 60fa07d6f9..bc5d45302e 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -214,6 +214,17 @@ struct rt_sigframe { }; #define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) + +/* + * Verify that vdso-asmoffset.h constants match. + */ +#include "i386/vdso-asmoffset.h" + +QEMU_BUILD_BUG_ON(offsetof(struct sigframe, sc.eip) + != SIGFRAME_SIGCONTEXT_eip); +QEMU_BUILD_BUG_ON(offsetof(struct rt_sigframe, uc.tuc_mcontext.eip) + != RT_SIGFRAME_SIGCONTEXT_eip); + #else struct rt_sigframe { diff --git a/linux-user/i386/vdso-asmoffset.h b/linux-user/i386/vdso-asmoffset.h new file mode 100644 index 0000000000..4e5ee0dd49 --- /dev/null +++ b/linux-user/i386/vdso-asmoffset.h @@ -0,0 +1,6 @@ +/* + * offsetof(struct sigframe, sc.eip) + * offsetof(struct rt_sigframe, uc.tuc_mcontext.eip) + */ +#define SIGFRAME_SIGCONTEXT_eip 64 +#define RT_SIGFRAME_SIGCONTEXT_eip 220 diff --git a/linux-user/i386/vdso.S b/linux-user/i386/vdso.S new file mode 100644 index 0000000000..e7a1f333a1 --- /dev/null +++ b/linux-user/i386/vdso.S @@ -0,0 +1,143 @@ +/* + * i386 linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include "vdso-asmoffset.h" + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro vdso_syscall1 name, nr +\name: + .cfi_startproc + mov %ebx, %edx + .cfi_register %ebx, %edx + mov 4(%esp), %ebx + mov $\nr, %eax + int $0x80 + mov %edx, %ebx + ret + .cfi_endproc +endf \name +.endm + +.macro vdso_syscall2 name, nr +\name: + .cfi_startproc + mov %ebx, %edx + .cfi_register %ebx, %edx + mov 4(%esp), %ebx + mov 8(%esp), %ecx + mov $\nr, %eax + int $0x80 + mov %edx, %ebx + ret + .cfi_endproc +endf \name +.endm + +.macro vdso_syscall3 name, nr +\name: + .cfi_startproc + push %ebx + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset %ebx, 0 + mov 8(%esp), %ebx + mov 12(%esp), %ecx + mov 16(%esp), %edx + mov $\nr, %eax + int $0x80 + pop %ebx + .cfi_adjust_cfa_offset -4 + .cfi_restore %ebx + ret + .cfi_endproc +endf \name +.endm + +__kernel_vsyscall: + .cfi_startproc + int $0x80 + ret + .cfi_endproc +endf __kernel_vsyscall + +vdso_syscall2 __vdso_clock_gettime, __NR_clock_gettime +vdso_syscall2 __vdso_clock_gettime64, __NR_clock_gettime64 +vdso_syscall2 __vdso_clock_getres, __NR_clock_getres +vdso_syscall2 __vdso_gettimeofday, __NR_gettimeofday +vdso_syscall1 __vdso_time, __NR_time +vdso_syscall3 __vdso_getcpu, __NR_gettimeofday + +/* + * Signal return handlers. + */ + + .cfi_startproc simple + .cfi_signal_frame + +/* + * For convenience, put the cfa just above eip in sigcontext, and count + * offsets backward from there. Re-compute the cfa in the two contexts + * we have for signal unwinding. This is far simpler than the + * DW_CFA_expression form that the kernel uses, and is equally correct. + */ + + .cfi_def_cfa %esp, SIGFRAME_SIGCONTEXT_eip + 4 + + .cfi_offset %eip, -4 + /* err, -8 */ + /* trapno, -12 */ + .cfi_offset %eax, -16 + .cfi_offset %ecx, -20 + .cfi_offset %edx, -24 + .cfi_offset %ebx, -28 + .cfi_offset %esp, -32 + .cfi_offset %ebp, -36 + .cfi_offset %esi, -40 + .cfi_offset %edi, -44 + +/* + * While this frame is marked as a signal frame, that only applies to how + * the return address is handled for the outer frame. The return address + * that arrived here, from the inner frame, is not marked as a signal frame + * and so the unwinder still tries to subtract 1 to examine the presumed + * call insn. Thus we must extend the unwind info to a nop before the start. + */ + nop + +__kernel_sigreturn: + popl %eax /* pop sig */ + .cfi_adjust_cfa_offset -4 + movl $__NR_sigreturn, %eax + int $0x80 +endf __kernel_sigreturn + + .cfi_def_cfa_offset RT_SIGFRAME_SIGCONTEXT_eip + 4 + nop + +__kernel_rt_sigreturn: + movl $__NR_rt_sigreturn, %eax + int $0x80 +endf __kernel_rt_sigreturn + + .cfi_endproc + +/* + * TODO: Add elf notes. E.g. + * + * #include + * ELFNOTE_START(Linux, 0, "a") + * .long LINUX_VERSION_CODE + * ELFNOTE_END + * + * but what version number would we set for QEMU? + */ diff --git a/linux-user/i386/vdso.ld b/linux-user/i386/vdso.ld new file mode 100644 index 0000000000..326b7a8f98 --- /dev/null +++ b/linux-user/i386/vdso.ld @@ -0,0 +1,76 @@ +/* + * Linker script for linux i386 replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +ENTRY(__kernel_vsyscall) + +VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; + __vdso_getcpu; + }; + + LINUX_2.5 { + global: + __kernel_vsyscall; + __kernel_sigreturn; + __kernel_rt_sigreturn; + local: *; + }; +} + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + .data : { + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load =0x90909090 +} diff --git a/linux-user/i386/vdso.so b/linux-user/i386/vdso.so new file mode 100755 index 0000000000..bdece5dfcf Binary files /dev/null and b/linux-user/i386/vdso.so differ diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c index 745cce70ab..4a794f8cea 100644 --- a/linux-user/linuxload.c +++ b/linux-user/linuxload.c @@ -3,7 +3,9 @@ #include "qemu/osdep.h" #include "qemu.h" #include "user-internals.h" +#include "user-mmap.h" #include "loader.h" +#include "qapi/error.h" #define NGROUPS 32 @@ -37,7 +39,7 @@ static int prepare_binprm(struct linux_binprm *bprm) int mode; int retval; - if (fstat(bprm->fd, &st) < 0) { + if (fstat(bprm->src.fd, &st) < 0) { return -errno; } @@ -67,7 +69,7 @@ static int prepare_binprm(struct linux_binprm *bprm) bprm->e_gid = st.st_gid; } - retval = read(bprm->fd, bprm->buf, BPRM_BUF_SIZE); + retval = read(bprm->src.fd, bprm->buf, BPRM_BUF_SIZE); if (retval < 0) { perror("prepare_binprm"); exit(-1); @@ -76,6 +78,10 @@ static int prepare_binprm(struct linux_binprm *bprm) /* Make sure the rest of the loader won't read garbage. */ memset(bprm->buf + retval, 0, BPRM_BUF_SIZE - retval); } + + bprm->src.cache = bprm->buf; + bprm->src.cache_size = retval; + return retval; } @@ -138,7 +144,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, { int retval; - bprm->fd = fdexec; + bprm->src.fd = fdexec; bprm->filename = (char *)filename; bprm->argc = count(argv); bprm->argv = argv; @@ -147,29 +153,112 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, retval = prepare_binprm(bprm); - if (retval >= 0) { - if (bprm->buf[0] == 0x7f - && bprm->buf[1] == 'E' - && bprm->buf[2] == 'L' - && bprm->buf[3] == 'F') { - retval = load_elf_binary(bprm, infop); -#if defined(TARGET_HAS_BFLT) - } else if (bprm->buf[0] == 'b' - && bprm->buf[1] == 'F' - && bprm->buf[2] == 'L' - && bprm->buf[3] == 'T') { - retval = load_flt_binary(bprm, infop); -#endif - } else { - return -ENOEXEC; - } + if (retval < 4) { + return -ENOEXEC; } - - if (retval >= 0) { - /* success. Initialize important registers */ - do_init_thread(regs, infop); + if (bprm->buf[0] == 0x7f + && bprm->buf[1] == 'E' + && bprm->buf[2] == 'L' + && bprm->buf[3] == 'F') { + retval = load_elf_binary(bprm, infop); +#if defined(TARGET_HAS_BFLT) + } else if (bprm->buf[0] == 'b' + && bprm->buf[1] == 'F' + && bprm->buf[2] == 'L' + && bprm->buf[3] == 'T') { + retval = load_flt_binary(bprm, infop); +#endif + } else { + return -ENOEXEC; + } + if (retval < 0) { return retval; } - return retval; + /* Success. Initialize important registers. */ + do_init_thread(regs, infop); + return 0; +} + +bool imgsrc_read(void *dst, off_t offset, size_t len, + const ImageSource *img, Error **errp) +{ + ssize_t ret; + + if (offset + len <= img->cache_size) { + memcpy(dst, img->cache + offset, len); + return true; + } + + if (img->fd < 0) { + error_setg(errp, "read past end of buffer"); + return false; + } + + ret = pread(img->fd, dst, len, offset); + if (ret == len) { + return true; + } + if (ret < 0) { + error_setg_errno(errp, errno, "Error reading file header"); + } else { + error_setg(errp, "Incomplete read of file header"); + } + return false; +} + +void *imgsrc_read_alloc(off_t offset, size_t len, + const ImageSource *img, Error **errp) +{ + void *alloc = g_malloc(len); + bool ok = imgsrc_read(alloc, offset, len, img, errp); + + if (!ok) { + g_free(alloc); + alloc = NULL; + } + return alloc; +} + +abi_long imgsrc_mmap(abi_ulong start, abi_ulong len, int prot, + int flags, const ImageSource *src, abi_ulong offset) +{ + const int prot_write = PROT_READ | PROT_WRITE; + abi_long ret; + void *haddr; + + assert(flags == (MAP_PRIVATE | MAP_FIXED)); + + if (src->fd >= 0) { + return target_mmap(start, len, prot, flags, src->fd, offset); + } + + /* + * This case is for the vdso; we don't expect bad images. + * The mmap may extend beyond the end of the image, especially + * to the end of the page. Zero fill. + */ + assert(offset < src->cache_size); + + ret = target_mmap(start, len, prot_write, flags | MAP_ANON, -1, 0); + if (ret == -1) { + return ret; + } + + haddr = lock_user(VERIFY_WRITE, start, len, 0); + assert(haddr != NULL); + if (offset + len <= src->cache_size) { + memcpy(haddr, src->cache + offset, len); + } else { + size_t rest = src->cache_size - offset; + memcpy(haddr, src->cache + offset, rest); + memset(haddr + rest, 0, len - rest); + } + unlock_user(haddr, start, len); + + if (prot != prot_write) { + target_mprotect(start, len, prot); + } + + return ret; } diff --git a/linux-user/loader.h b/linux-user/loader.h index 324e5c872a..a0834290e7 100644 --- a/linux-user/loader.h +++ b/linux-user/loader.h @@ -18,6 +18,48 @@ #ifndef LINUX_USER_LOADER_H #define LINUX_USER_LOADER_H +typedef struct { + const void *cache; + unsigned int cache_size; + int fd; +} ImageSource; + +/** + * imgsrc_read: Read from ImageSource + * @dst: destination for read + * @offset: offset within file for read + * @len: size of the read + * @img: ImageSource to read from + * @errp: Error details. + * + * Read into @dst, using the cache when possible. + */ +bool imgsrc_read(void *dst, off_t offset, size_t len, + const ImageSource *img, Error **errp); + +/** + * imgsrc_read_alloc: Read from ImageSource + * @offset: offset within file for read + * @size: size of the read + * @img: ImageSource to read from + * @errp: Error details. + * + * Read into newly allocated memory, using the cache when possible. + */ +void *imgsrc_read_alloc(off_t offset, size_t len, + const ImageSource *img, Error **errp); + +/** + * imgsrc_mmap: Map from ImageSource + * + * If @src has a file descriptor, pass on to target_mmap. Otherwise, + * this is "mapping" from a host buffer, which resolves to memcpy. + * Therefore, flags must be MAP_PRIVATE | MAP_FIXED; the argument is + * retained for clarity. + */ +abi_long imgsrc_mmap(abi_ulong start, abi_ulong len, int prot, + int flags, const ImageSource *src, abi_ulong offset); + /* * Read a good amount of data initially, to hopefully get all the * program headers loaded. @@ -29,15 +71,15 @@ * used when loading binaries. */ struct linux_binprm { - char buf[BPRM_BUF_SIZE] __attribute__((aligned)); - abi_ulong p; - int fd; - int e_uid, e_gid; - int argc, envc; - char **argv; - char **envp; - char *filename; /* Name of binary */ - int (*core_dump)(int, const CPUArchState *); /* coredump routine */ + char buf[BPRM_BUF_SIZE] __attribute__((aligned)); + ImageSource src; + abi_ulong p; + int e_uid, e_gid; + int argc, envc; + char **argv; + char **envp; + char *filename; /* Name of binary */ + int (*core_dump)(int, const CPUArchState *); /* coredump routine */ }; void do_init_thread(struct target_pt_regs *regs, struct image_info *infop); diff --git a/linux-user/loongarch64/Makefile.vdso b/linux-user/loongarch64/Makefile.vdso new file mode 100644 index 0000000000..369de13344 --- /dev/null +++ b/linux-user/loongarch64/Makefile.vdso @@ -0,0 +1,11 @@ +include $(BUILD_DIR)/tests/tcg/loongarch64-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/loongarch64 +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso.so + +$(SUBDIR)/vdso.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ -nostdlib -shared -fpic -Wl,-h,linux-vdso.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both \ + -Wl,--no-warn-rwx-segments -Wl,-T,$(SUBDIR)/vdso.ld $< diff --git a/linux-user/loongarch64/meson.build b/linux-user/loongarch64/meson.build new file mode 100644 index 0000000000..17896535f0 --- /dev/null +++ b/linux-user/loongarch64/meson.build @@ -0,0 +1,4 @@ +vdso_inc = gen_vdso.process('vdso.so', + extra_args: ['-r', '__vdso_rt_sigreturn']) + +linux_user_ss.add(when: 'TARGET_LOONGARCH64', if_true: vdso_inc) diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c index 39572c1190..afcee641a6 100644 --- a/linux-user/loongarch64/signal.c +++ b/linux-user/loongarch64/signal.c @@ -10,9 +10,9 @@ #include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" - #include "target/loongarch/internals.h" #include "target/loongarch/vec.h" +#include "vdso-asmoffset.h" /* FP context was used */ #define SC_USED_FP (1 << 0) @@ -24,6 +24,11 @@ struct target_sigcontext { uint64_t sc_extcontext[0] QEMU_ALIGNED(16); }; +QEMU_BUILD_BUG_ON(sizeof(struct target_sigcontext) != sizeof_sigcontext); +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_pc) + != offsetof_sigcontext_pc); +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, sc_regs) + != offsetof_sigcontext_gr); #define FPU_CTX_MAGIC 0x46505501 #define FPU_CTX_ALIGN 8 @@ -33,6 +38,9 @@ struct target_fpu_context { uint32_t fcsr; } QEMU_ALIGNED(FPU_CTX_ALIGN); +QEMU_BUILD_BUG_ON(offsetof(struct target_fpu_context, regs) + != offsetof_fpucontext_fr); + #define CONTEXT_INFO_ALIGN 16 struct target_sctx_info { uint32_t magic; @@ -40,6 +48,8 @@ struct target_sctx_info { uint64_t padding; } QEMU_ALIGNED(CONTEXT_INFO_ALIGN); +QEMU_BUILD_BUG_ON(sizeof(struct target_sctx_info) != sizeof_sctx_info); + struct target_ucontext { abi_ulong tuc_flags; abi_ptr tuc_link; @@ -54,6 +64,11 @@ struct target_rt_sigframe { struct target_ucontext rs_uc; }; +QEMU_BUILD_BUG_ON(sizeof(struct target_rt_sigframe) + != sizeof_rt_sigframe); +QEMU_BUILD_BUG_ON(offsetof(struct target_rt_sigframe, rs_uc.tuc_mcontext) + != offsetof_sigcontext); + /* * These two structures are not present in guest memory, are private * to the signal implementation, but are largely copied from the diff --git a/linux-user/loongarch64/vdso-asmoffset.h b/linux-user/loongarch64/vdso-asmoffset.h new file mode 100644 index 0000000000..60d113822f --- /dev/null +++ b/linux-user/loongarch64/vdso-asmoffset.h @@ -0,0 +1,8 @@ +#define sizeof_rt_sigframe 0x240 +#define sizeof_sigcontext 0x110 +#define sizeof_sctx_info 0x10 + +#define offsetof_sigcontext 0x130 +#define offsetof_sigcontext_pc 0 +#define offsetof_sigcontext_gr 8 +#define offsetof_fpucontext_fr 0 diff --git a/linux-user/loongarch64/vdso.S b/linux-user/loongarch64/vdso.S new file mode 100644 index 0000000000..780a5fda12 --- /dev/null +++ b/linux-user/loongarch64/vdso.S @@ -0,0 +1,130 @@ +/* + * Loongarch64 linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include "vdso-asmoffset.h" + + + .text + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro vdso_syscall name, nr +\name: + li.w $a7, \nr + syscall 0 + jr $ra +endf \name +.endm + + .cfi_startproc + +vdso_syscall __vdso_gettimeofday, __NR_gettimeofday +vdso_syscall __vdso_clock_gettime, __NR_clock_gettime +vdso_syscall __vdso_clock_getres, __NR_clock_getres +vdso_syscall __vdso_getcpu, __NR_getcpu + + .cfi_endproc + +/* + * Start the unwind info at least one instruction before the signal + * trampoline, because the unwinder will assume we are returning + * after a call site. + */ + + .cfi_startproc simple + .cfi_signal_frame + +#define B_GR offsetof_sigcontext_gr +#define B_FR sizeof_sigcontext + sizeof_sctx_info + offsetof_fpucontext_fr + + .cfi_def_cfa 2, offsetof_sigcontext + + /* Return address */ + .cfi_return_column 64 + .cfi_offset 64, offsetof_sigcontext_pc /* pc */ + + /* Integer registers */ + .cfi_offset 1, B_GR + 1 * 8 + .cfi_offset 2, B_GR + 2 * 8 + .cfi_offset 3, B_GR + 3 * 8 + .cfi_offset 4, B_GR + 4 * 8 + .cfi_offset 5, B_GR + 5 * 8 + .cfi_offset 6, B_GR + 6 * 8 + .cfi_offset 7, B_GR + 7 * 8 + .cfi_offset 8, B_GR + 8 * 8 + .cfi_offset 9, B_GR + 9 * 8 + .cfi_offset 10, B_GR + 10 * 8 + .cfi_offset 11, B_GR + 11 * 8 + .cfi_offset 12, B_GR + 12 * 8 + .cfi_offset 13, B_GR + 13 * 8 + .cfi_offset 14, B_GR + 14 * 8 + .cfi_offset 15, B_GR + 15 * 8 + .cfi_offset 16, B_GR + 16 * 8 + .cfi_offset 17, B_GR + 17 * 8 + .cfi_offset 18, B_GR + 18 * 8 + .cfi_offset 19, B_GR + 19 * 8 + .cfi_offset 20, B_GR + 20 * 8 + .cfi_offset 21, B_GR + 21 * 8 + .cfi_offset 22, B_GR + 22 * 8 + .cfi_offset 23, B_GR + 23 * 8 + .cfi_offset 24, B_GR + 24 * 8 + .cfi_offset 25, B_GR + 25 * 8 + .cfi_offset 26, B_GR + 26 * 8 + .cfi_offset 27, B_GR + 27 * 8 + .cfi_offset 28, B_GR + 28 * 8 + .cfi_offset 29, B_GR + 29 * 8 + .cfi_offset 30, B_GR + 30 * 8 + .cfi_offset 31, B_GR + 31 * 8 + + /* Floating point registers */ + .cfi_offset 32, B_FR + 0 + .cfi_offset 33, B_FR + 1 * 8 + .cfi_offset 34, B_FR + 2 * 8 + .cfi_offset 35, B_FR + 3 * 8 + .cfi_offset 36, B_FR + 4 * 8 + .cfi_offset 37, B_FR + 5 * 8 + .cfi_offset 38, B_FR + 6 * 8 + .cfi_offset 39, B_FR + 7 * 8 + .cfi_offset 40, B_FR + 8 * 8 + .cfi_offset 41, B_FR + 9 * 8 + .cfi_offset 42, B_FR + 10 * 8 + .cfi_offset 43, B_FR + 11 * 8 + .cfi_offset 44, B_FR + 12 * 8 + .cfi_offset 45, B_FR + 13 * 8 + .cfi_offset 46, B_FR + 14 * 8 + .cfi_offset 47, B_FR + 15 * 8 + .cfi_offset 48, B_FR + 16 * 8 + .cfi_offset 49, B_FR + 17 * 8 + .cfi_offset 50, B_FR + 18 * 8 + .cfi_offset 51, B_FR + 19 * 8 + .cfi_offset 52, B_FR + 20 * 8 + .cfi_offset 53, B_FR + 21 * 8 + .cfi_offset 54, B_FR + 22 * 8 + .cfi_offset 55, B_FR + 23 * 8 + .cfi_offset 56, B_FR + 24 * 8 + .cfi_offset 57, B_FR + 25 * 8 + .cfi_offset 58, B_FR + 26 * 8 + .cfi_offset 59, B_FR + 27 * 8 + .cfi_offset 60, B_FR + 28 * 8 + .cfi_offset 61, B_FR + 29 * 8 + .cfi_offset 62, B_FR + 30 * 8 + .cfi_offset 63, B_FR + 31 * 8 + + nop + +__vdso_rt_sigreturn: + li.w $a7, __NR_rt_sigreturn + syscall 0 + .cfi_endproc +endf __vdso_rt_sigreturn diff --git a/linux-user/loongarch64/vdso.ld b/linux-user/loongarch64/vdso.ld new file mode 100644 index 0000000000..682446ed0c --- /dev/null +++ b/linux-user/loongarch64/vdso.ld @@ -0,0 +1,73 @@ +/* + * Linker script for linux loongarch64 replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_5.10 { + global: + __vdso_getcpu; + __vdso_clock_getres; + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_rt_sigreturn; + + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + /* + * We can't prelink to any address without knowing something about + * the virtual memory space of the host, since that leaks over into + * the available memory space of the guest. + */ + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load =0xd503201f +} diff --git a/linux-user/loongarch64/vdso.so b/linux-user/loongarch64/vdso.so new file mode 100755 index 0000000000..bfaa26f2bf Binary files /dev/null and b/linux-user/loongarch64/vdso.so differ diff --git a/linux-user/meson.build b/linux-user/meson.build index 7171dc60be..bc41e8c3bc 100644 --- a/linux-user/meson.build +++ b/linux-user/meson.build @@ -28,18 +28,25 @@ linux_user_ss.add(when: 'TARGET_HAS_BFLT', if_true: files('flatload.c')) linux_user_ss.add(when: 'TARGET_I386', if_true: files('vm86.c')) linux_user_ss.add(when: 'CONFIG_ARM_COMPATIBLE_SEMIHOSTING', if_true: files('semihost.c')) - syscall_nr_generators = {} +gen_vdso_exe = executable('gen-vdso', 'gen-vdso.c', + native: true, build_by_default: false) +gen_vdso = generator(gen_vdso_exe, output: '@BASENAME@.c.inc', + arguments: ['-o', '@OUTPUT@', '@EXTRA_ARGS@', '@INPUT@']) + +subdir('aarch64') subdir('alpha') subdir('arm') subdir('hppa') subdir('i386') +subdir('loongarch64') subdir('m68k') subdir('microblaze') subdir('mips64') subdir('mips') subdir('ppc') +subdir('riscv') subdir('s390x') subdir('sh4') subdir('sparc') diff --git a/linux-user/ppc/Makefile.vdso b/linux-user/ppc/Makefile.vdso new file mode 100644 index 0000000000..3ca3c6b83e --- /dev/null +++ b/linux-user/ppc/Makefile.vdso @@ -0,0 +1,20 @@ +include $(BUILD_DIR)/tests/tcg/ppc64-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/ppc +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso-32.so $(SUBDIR)/vdso-64.so $(SUBDIR)/vdso-64le.so + +LDFLAGS32 = -nostdlib -shared -Wl,-T,$(SUBDIR)/vdso-32.ld \ + -Wl,-h,linux-vdso32.so.1 -Wl,--hash-style=both -Wl,--build-id=sha1 +LDFLAGS64 = -nostdlib -shared -Wl,-T,$(SUBDIR)/vdso-64.ld \ + -Wl,-h,linux-vdso64.so.1 -Wl,--hash-style=both -Wl,--build-id=sha1 + +$(SUBDIR)/vdso-32.so: vdso.S vdso-32.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS32) -m32 $< + +$(SUBDIR)/vdso-64.so: vdso.S vdso-64.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS64) -mbig-endian $< + +$(SUBDIR)/vdso-64le.so: vdso.S vdso-64.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS64) -mlittle-endian $< diff --git a/linux-user/ppc/meson.build b/linux-user/ppc/meson.build index 19fead7bc8..80cacae396 100644 --- a/linux-user/ppc/meson.build +++ b/linux-user/ppc/meson.build @@ -3,3 +3,15 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +vdso_32_inc = gen_vdso.process('vdso-32.so', extra_args: [ + '-s', '__kernel_sigtramp32', + '-r', '__kernel_sigtramp_rt32' + ]) +linux_user_ss.add(when: 'TARGET_PPC', if_true: vdso_32_inc) + +vdso_64_inc = gen_vdso.process('vdso-64.so', + extra_args: ['-r', '__kernel_sigtramp_rt64']) +vdso_64le_inc = gen_vdso.process('vdso-64le.so', + extra_args: ['-r', '__kernel_sigtramp_rt64']) +linux_user_ss.add(when: 'TARGET_PPC64', if_true: [vdso_64_inc, vdso_64le_inc]) diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c index a616f20efb..7e7302823b 100644 --- a/linux-user/ppc/signal.c +++ b/linux-user/ppc/signal.c @@ -21,14 +21,7 @@ #include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" - -/* Size of dummy stack frame allocated when calling signal handler. - See arch/powerpc/include/asm/ptrace.h. */ -#if defined(TARGET_PPC64) -#define SIGNAL_FRAMESIZE 128 -#else -#define SIGNAL_FRAMESIZE 64 -#endif +#include "vdso-asmoffset.h" /* See arch/powerpc/include/asm/ucontext.h. Only used for 32-bit PPC; on 64-bit PPC, sigcontext and mcontext are one and the same. */ @@ -73,6 +66,16 @@ struct target_mcontext { #endif }; +QEMU_BUILD_BUG_ON(offsetof(struct target_mcontext, mc_fregs) + != offsetof_mcontext_fregs); +#if defined(TARGET_PPC64) +QEMU_BUILD_BUG_ON(offsetof(struct target_mcontext, v_regs) + != offsetof_mcontext_vregs_ptr); +#else +QEMU_BUILD_BUG_ON(offsetof(struct target_mcontext, mc_vregs) + != offsetof_mcontext_vregs); +#endif + /* See arch/powerpc/include/asm/sigcontext.h. */ struct target_sigcontext { target_ulong _unused[4]; @@ -161,6 +164,7 @@ struct target_ucontext { #endif }; +#if !defined(TARGET_PPC64) /* See arch/powerpc/kernel/signal_32.c. */ struct target_sigframe { struct target_sigcontext sctx; @@ -168,6 +172,10 @@ struct target_sigframe { int32_t abigap[56]; }; +QEMU_BUILD_BUG_ON(offsetof(struct target_sigframe, mctx) + != offsetof_sigframe_mcontext); +#endif + #if defined(TARGET_PPC64) #define TARGET_TRAMP_SIZE 6 @@ -184,6 +192,10 @@ struct target_rt_sigframe { char abigap[288]; } __attribute__((aligned(16))); +QEMU_BUILD_BUG_ON(offsetof(struct target_rt_sigframe, + uc.tuc_sigcontext.mcontext) + != offsetof_rt_sigframe_mcontext); + #else struct target_rt_sigframe { @@ -192,6 +204,9 @@ struct target_rt_sigframe { int32_t abigap[56]; }; +QEMU_BUILD_BUG_ON(offsetof(struct target_rt_sigframe, uc.tuc_mcontext) + != offsetof_rt_sigframe_mcontext); + #endif #if defined(TARGET_PPC64) diff --git a/linux-user/ppc/vdso-32.ld b/linux-user/ppc/vdso-32.ld new file mode 100644 index 0000000000..6962696540 --- /dev/null +++ b/linux-user/ppc/vdso-32.ld @@ -0,0 +1,70 @@ +/* + * Linker script for linux powerpc64 replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_2.6.15 { + global: + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_gettime64; + __kernel_clock_getres; + __kernel_time; + __kernel_sync_dicache; + __kernel_sigtramp32; + __kernel_sigtramp_rt32; + __kernel_getcpu; + local: *; + }; +} + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + .data : { + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load +} diff --git a/linux-user/ppc/vdso-32.so b/linux-user/ppc/vdso-32.so new file mode 100755 index 0000000000..b19baafb0d Binary files /dev/null and b/linux-user/ppc/vdso-32.so differ diff --git a/linux-user/ppc/vdso-64.ld b/linux-user/ppc/vdso-64.ld new file mode 100644 index 0000000000..a55c65ed54 --- /dev/null +++ b/linux-user/ppc/vdso-64.ld @@ -0,0 +1,68 @@ +/* + * Linker script for linux powerpc64 replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_2.6.15 { + global: + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_sync_dicache; + __kernel_sigtramp_rt64; + __kernel_getcpu; + __kernel_time; + local: *; + }; +} + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + .data : { + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load +} diff --git a/linux-user/ppc/vdso-64.so b/linux-user/ppc/vdso-64.so new file mode 100755 index 0000000000..913c831b38 Binary files /dev/null and b/linux-user/ppc/vdso-64.so differ diff --git a/linux-user/ppc/vdso-64le.so b/linux-user/ppc/vdso-64le.so new file mode 100755 index 0000000000..258a03b807 Binary files /dev/null and b/linux-user/ppc/vdso-64le.so differ diff --git a/linux-user/ppc/vdso-asmoffset.h b/linux-user/ppc/vdso-asmoffset.h new file mode 100644 index 0000000000..6844c8c81c --- /dev/null +++ b/linux-user/ppc/vdso-asmoffset.h @@ -0,0 +1,20 @@ +/* + * Size of dummy stack frame allocated when calling signal handler. + * See arch/powerpc/include/asm/ptrace.h. + */ +#ifdef TARGET_ABI32 +# define SIGNAL_FRAMESIZE 64 +#else +# define SIGNAL_FRAMESIZE 128 +#endif + +#ifdef TARGET_ABI32 +# define offsetof_sigframe_mcontext 0x20 +# define offsetof_rt_sigframe_mcontext 0x140 +# define offsetof_mcontext_fregs 0xc0 +# define offsetof_mcontext_vregs 0x1d0 +#else +# define offsetof_rt_sigframe_mcontext 0xe8 +# define offsetof_mcontext_fregs 0x180 +# define offsetof_mcontext_vregs_ptr 0x288 +#endif diff --git a/linux-user/ppc/vdso.S b/linux-user/ppc/vdso.S new file mode 100644 index 0000000000..689010db13 --- /dev/null +++ b/linux-user/ppc/vdso.S @@ -0,0 +1,239 @@ +/* + * PowerPC linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include + +#ifndef _ARCH_PPC64 +# define TARGET_ABI32 +#endif +#include "vdso-asmoffset.h" + + + .text + +.macro endf name + .globl \name + .size \name, .-\name + /* For PPC64, functions have special linkage; we export pointers. */ +#ifndef _ARCH_PPC64 + .type \name, @function +#endif +.endm + +.macro raw_syscall nr + addi 0, 0, \nr + sc +.endm + +.macro vdso_syscall name, nr +\name: + raw_syscall \nr + blr +endf \name +.endm + + .cfi_startproc + +vdso_syscall __kernel_gettimeofday, __NR_gettimeofday +vdso_syscall __kernel_clock_gettime, __NR_clock_gettime +vdso_syscall __kernel_clock_getres, __NR_clock_getres +vdso_syscall __kernel_getcpu, __NR_getcpu +vdso_syscall __kernel_time, __NR_time + +#ifdef __NR_clock_gettime64 +vdso_syscall __kernel_clock_gettime64, __NR_clock_gettime64 +#endif + +__kernel_sync_dicache: + /* qemu does not need to flush caches */ + blr +endf __kernel_sync_dicache + + .cfi_endproc + +/* + * TODO: __kernel_get_tbfreq + * This is probably a constant for QEMU. + */ + +/* + * Start the unwind info at least one instruction before the signal + * trampoline, because the unwinder will assume we are returning + * after a call site. + */ + + .cfi_startproc simple + .cfi_signal_frame + +#ifdef _ARCH_PPC64 +# define __kernel_sigtramp_rt __kernel_sigtramp_rt64 +# define sizeof_reg 8 +#else +# define __kernel_sigtramp_rt __kernel_sigtramp_rt32 +# define sizeof_reg 4 +#endif +#define sizeof_freg 8 +#define sizeof_vreg 16 + + .cfi_def_cfa 1, SIGNAL_FRAMESIZE + offsetof_rt_sigframe_mcontext + + /* Return address */ + .cfi_return_column 67 + .cfi_offset 67, 32 * sizeof_reg /* nip */ + + /* Integer registers */ + .cfi_offset 0, 0 * sizeof_reg + .cfi_offset 1, 1 * sizeof_reg + .cfi_offset 2, 2 * sizeof_reg + .cfi_offset 3, 3 * sizeof_reg + .cfi_offset 4, 4 * sizeof_reg + .cfi_offset 5, 5 * sizeof_reg + .cfi_offset 6, 6 * sizeof_reg + .cfi_offset 7, 7 * sizeof_reg + .cfi_offset 8, 8 * sizeof_reg + .cfi_offset 9, 9 * sizeof_reg + .cfi_offset 10, 10 * sizeof_reg + .cfi_offset 11, 11 * sizeof_reg + .cfi_offset 12, 12 * sizeof_reg + .cfi_offset 13, 13 * sizeof_reg + .cfi_offset 14, 14 * sizeof_reg + .cfi_offset 15, 15 * sizeof_reg + .cfi_offset 16, 16 * sizeof_reg + .cfi_offset 17, 17 * sizeof_reg + .cfi_offset 18, 18 * sizeof_reg + .cfi_offset 19, 19 * sizeof_reg + .cfi_offset 20, 20 * sizeof_reg + .cfi_offset 21, 21 * sizeof_reg + .cfi_offset 22, 22 * sizeof_reg + .cfi_offset 23, 23 * sizeof_reg + .cfi_offset 24, 24 * sizeof_reg + .cfi_offset 25, 25 * sizeof_reg + .cfi_offset 26, 26 * sizeof_reg + .cfi_offset 27, 27 * sizeof_reg + .cfi_offset 28, 28 * sizeof_reg + .cfi_offset 29, 29 * sizeof_reg + .cfi_offset 30, 30 * sizeof_reg + .cfi_offset 31, 31 * sizeof_reg + .cfi_offset 65, 36 * sizeof_reg /* lr */ + .cfi_offset 70, 38 * sizeof_reg /* ccr */ + + /* Floating point registers */ + .cfi_offset 32, offsetof_mcontext_fregs + .cfi_offset 33, offsetof_mcontext_fregs + 1 * sizeof_freg + .cfi_offset 34, offsetof_mcontext_fregs + 2 * sizeof_freg + .cfi_offset 35, offsetof_mcontext_fregs + 3 * sizeof_freg + .cfi_offset 36, offsetof_mcontext_fregs + 4 * sizeof_freg + .cfi_offset 37, offsetof_mcontext_fregs + 5 * sizeof_freg + .cfi_offset 38, offsetof_mcontext_fregs + 6 * sizeof_freg + .cfi_offset 39, offsetof_mcontext_fregs + 7 * sizeof_freg + .cfi_offset 40, offsetof_mcontext_fregs + 8 * sizeof_freg + .cfi_offset 41, offsetof_mcontext_fregs + 9 * sizeof_freg + .cfi_offset 42, offsetof_mcontext_fregs + 10 * sizeof_freg + .cfi_offset 43, offsetof_mcontext_fregs + 11 * sizeof_freg + .cfi_offset 44, offsetof_mcontext_fregs + 12 * sizeof_freg + .cfi_offset 45, offsetof_mcontext_fregs + 13 * sizeof_freg + .cfi_offset 46, offsetof_mcontext_fregs + 14 * sizeof_freg + .cfi_offset 47, offsetof_mcontext_fregs + 15 * sizeof_freg + .cfi_offset 48, offsetof_mcontext_fregs + 16 * sizeof_freg + .cfi_offset 49, offsetof_mcontext_fregs + 17 * sizeof_freg + .cfi_offset 50, offsetof_mcontext_fregs + 18 * sizeof_freg + .cfi_offset 51, offsetof_mcontext_fregs + 19 * sizeof_freg + .cfi_offset 52, offsetof_mcontext_fregs + 20 * sizeof_freg + .cfi_offset 53, offsetof_mcontext_fregs + 21 * sizeof_freg + .cfi_offset 54, offsetof_mcontext_fregs + 22 * sizeof_freg + .cfi_offset 55, offsetof_mcontext_fregs + 23 * sizeof_freg + .cfi_offset 56, offsetof_mcontext_fregs + 24 * sizeof_freg + .cfi_offset 57, offsetof_mcontext_fregs + 25 * sizeof_freg + .cfi_offset 58, offsetof_mcontext_fregs + 26 * sizeof_freg + .cfi_offset 59, offsetof_mcontext_fregs + 27 * sizeof_freg + .cfi_offset 60, offsetof_mcontext_fregs + 28 * sizeof_freg + .cfi_offset 61, offsetof_mcontext_fregs + 29 * sizeof_freg + .cfi_offset 62, offsetof_mcontext_fregs + 30 * sizeof_freg + .cfi_offset 63, offsetof_mcontext_fregs + 31 * sizeof_freg + + /* + * Unlike the kernel, unconditionally represent the Altivec/VSX regs. + * The space within the stack frame is always available, and most of + * our supported processors have them enabled. The only complication + * for PPC64 is the misalignment, so that we have to use indirection. + */ +.macro save_vreg_ofs reg, ofs +#ifdef _ARCH_PPC64 + /* + * vreg = *(cfa + offsetof(v_regs)) + ofs + * + * The CFA is input to the expression on the stack, so: + * DW_CFA_expression reg, length (7), + * DW_OP_plus_uconst (0x23), vreg_ptr, DW_OP_deref (0x06), + * DW_OP_plus_uconst (0x23), ofs + */ + .cfi_escape 0x10, 77 + \reg, 7, 0x23, (offsetof_mcontext_vregs_ptr & 0x7f) + 0x80, offsetof_mcontext_vregs_ptr >> 7, 0x06, 0x23, (\ofs & 0x7f) | 0x80, \ofs >> 7 +#else + .cfi_offset 77 + \reg, offsetof_mcontext_vregs + \ofs +#endif +.endm + +.macro save_vreg reg + save_vreg_ofs \reg, (\reg * sizeof_vreg) +.endm + + save_vreg 0 + save_vreg 1 + save_vreg 2 + save_vreg 3 + save_vreg 4 + save_vreg 5 + save_vreg 6 + save_vreg 7 + save_vreg 8 + save_vreg 9 + save_vreg 10 + save_vreg 11 + save_vreg 12 + save_vreg 13 + save_vreg 14 + save_vreg 15 + save_vreg 16 + save_vreg 17 + save_vreg 18 + save_vreg 19 + save_vreg 20 + save_vreg 21 + save_vreg 22 + save_vreg 23 + save_vreg 24 + save_vreg 25 + save_vreg 26 + save_vreg 27 + save_vreg 28 + save_vreg 29 + save_vreg 30 + save_vreg 31 + save_vreg 32 + save_vreg_ofs 33, (32 * sizeof_vreg + 12) + + nop + +__kernel_sigtramp_rt: + raw_syscall __NR_rt_sigreturn +endf __kernel_sigtramp_rt + +#ifndef _ARCH_PPC64 + /* + * The non-rt sigreturn has the same layout at a different offset. + * Move the CFA and leave all othe other descriptions the same. + */ + .cfi_def_cfa 1, SIGNAL_FRAMESIZE + offsetof_sigframe_mcontext + nop +__kernel_sigtramp32: + raw_syscall __NR_sigreturn +endf __kernel_sigtramp32 +#endif + + .cfi_endproc diff --git a/linux-user/qemu.h b/linux-user/qemu.h index 12f638336a..4de9ec783f 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -32,6 +32,7 @@ struct image_info { abi_ulong brk; abi_ulong start_stack; abi_ulong stack_limit; + abi_ulong vdso; abi_ulong entry; abi_ulong code_offset; abi_ulong data_offset; diff --git a/linux-user/riscv/Makefile.vdso b/linux-user/riscv/Makefile.vdso new file mode 100644 index 0000000000..2c257dbfda --- /dev/null +++ b/linux-user/riscv/Makefile.vdso @@ -0,0 +1,15 @@ +include $(BUILD_DIR)/tests/tcg/riscv64-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/riscv +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso-32.so $(SUBDIR)/vdso-64.so + +LDFLAGS = -nostdlib -shared -fpic -Wl,-h,linux-vdso.so.1 -Wl,--build-id=sha1 \ + -Wl,--hash-style=both -Wl,-T,$(SUBDIR)/vdso.ld + +$(SUBDIR)/vdso-32.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS) -mabi=ilp32d -march=rv32g $< + +$(SUBDIR)/vdso-64.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ $(LDFLAGS) -mabi=lp64d -march=rv64g $< diff --git a/linux-user/riscv/meson.build b/linux-user/riscv/meson.build new file mode 100644 index 0000000000..beb989a7ca --- /dev/null +++ b/linux-user/riscv/meson.build @@ -0,0 +1,7 @@ +vdso_32_inc = gen_vdso.process('vdso-32.so', + extra_args: ['-r', '__vdso_rt_sigreturn']) +vdso_64_inc = gen_vdso.process('vdso-64.so', + extra_args: ['-r', '__vdso_rt_sigreturn']) + +linux_user_ss.add(when: 'TARGET_RISCV32', if_true: vdso_32_inc) +linux_user_ss.add(when: 'TARGET_RISCV64', if_true: vdso_64_inc) diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c index f989f7f51f..941eadce87 100644 --- a/linux-user/riscv/signal.c +++ b/linux-user/riscv/signal.c @@ -21,6 +21,7 @@ #include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" +#include "vdso-asmoffset.h" /* Signal handler invocation must be transparent for the code being interrupted. Complete CPU (hart) state is saved on entry and restored @@ -37,6 +38,8 @@ struct target_sigcontext { uint32_t fcsr; }; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */ +QEMU_BUILD_BUG_ON(offsetof(struct target_sigcontext, fpr) != offsetof_freg0); + struct target_ucontext { abi_ulong uc_flags; abi_ptr uc_link; @@ -51,6 +54,11 @@ struct target_rt_sigframe { struct target_ucontext uc; }; +QEMU_BUILD_BUG_ON(sizeof(struct target_rt_sigframe) + != sizeof_rt_sigframe); +QEMU_BUILD_BUG_ON(offsetof(struct target_rt_sigframe, uc.uc_mcontext) + != offsetof_uc_mcontext); + static abi_ulong get_sigframe(struct target_sigaction *ka, CPURISCVState *regs, size_t framesize) { diff --git a/linux-user/riscv/vdso-32.so b/linux-user/riscv/vdso-32.so new file mode 100755 index 0000000000..1ad1e5cbbb Binary files /dev/null and b/linux-user/riscv/vdso-32.so differ diff --git a/linux-user/riscv/vdso-64.so b/linux-user/riscv/vdso-64.so new file mode 100755 index 0000000000..83992bebe6 Binary files /dev/null and b/linux-user/riscv/vdso-64.so differ diff --git a/linux-user/riscv/vdso-asmoffset.h b/linux-user/riscv/vdso-asmoffset.h new file mode 100644 index 0000000000..123902ef61 --- /dev/null +++ b/linux-user/riscv/vdso-asmoffset.h @@ -0,0 +1,9 @@ +#ifdef TARGET_ABI32 +# define sizeof_rt_sigframe 0x2b0 +# define offsetof_uc_mcontext 0x120 +# define offsetof_freg0 0x80 +#else +# define sizeof_rt_sigframe 0x340 +# define offsetof_uc_mcontext 0x130 +# define offsetof_freg0 0x100 +#endif diff --git a/linux-user/riscv/vdso.S b/linux-user/riscv/vdso.S new file mode 100644 index 0000000000..a86d8fc488 --- /dev/null +++ b/linux-user/riscv/vdso.S @@ -0,0 +1,187 @@ +/* + * RISC-V linux replacement vdso. + * + * Copyright 2021 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include + +#if __riscv_xlen == 32 +# define TARGET_ABI32 +#endif +#include "vdso-asmoffset.h" + + .text + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro raw_syscall nr + li a7, \nr + ecall +.endm + +.macro vdso_syscall name, nr +\name: + raw_syscall \nr + ret +endf \name +.endm + +__vdso_gettimeofday: + .cfi_startproc +#ifdef __NR_gettimeofday + raw_syscall __NR_gettimeofday + ret +#else + /* No gettimeofday, fall back to clock_gettime64. */ + beq a1, zero, 1f + sw zero, 0(a1) /* tz->tz_minuteswest = 0 */ + sw zero, 4(a1) /* tz->tz_dsttime = 0 */ +1: addi sp, sp, -32 + .cfi_adjust_cfa_offset 32 + sw a0, 16(sp) /* save tv */ + mv a0, sp + raw_syscall __NR_clock_gettime64 + lw t0, 0(sp) /* timespec.tv_sec.low */ + lw t1, 4(sp) /* timespec.tv_sec.high */ + lw t2, 8(sp) /* timespec.tv_nsec.low */ + lw a1, 16(sp) /* restore tv */ + addi sp, sp, 32 + .cfi_adjust_cfa_offset -32 + bne a0, zero, 9f /* syscall error? */ + li a0, -EOVERFLOW + bne t1, zero, 9f /* y2038? */ + li a0, 0 + li t3, 1000 + divu t2, t2, t3 /* nsec -> usec */ + sw t0, 0(a1) /* tz->tv_sec */ + sw t2, 4(a1) /* tz->tv_usec */ +9: ret +#endif + .cfi_endproc +endf __vdso_gettimeofday + + .cfi_startproc + +#ifdef __NR_clock_gettime +vdso_syscall __vdso_clock_gettime, __NR_clock_gettime +#else +vdso_syscall __vdso_clock_gettime, __NR_clock_gettime64 +#endif + +#ifdef __NR_clock_getres +vdso_syscall __vdso_clock_getres, __NR_clock_getres +#else +vdso_syscall __vdso_clock_getres, __NR_clock_getres_time64 +#endif + +vdso_syscall __vdso_getcpu, __NR_getcpu + +__vdso_flush_icache: + /* qemu does not need to flush the icache */ + li a0, 0 + ret +endf __vdso_flush_icache + + .cfi_endproc + +/* + * Start the unwind info at least one instruction before the signal + * trampoline, because the unwinder will assume we are returning + * after a call site. + */ + + .cfi_startproc simple + .cfi_signal_frame + +#define sizeof_reg (__riscv_xlen / 4) +#define sizeof_freg 8 +#define B_GR (offsetof_uc_mcontext - sizeof_rt_sigframe) +#define B_FR (offsetof_uc_mcontext - sizeof_rt_sigframe + offsetof_freg0) + + .cfi_def_cfa 2, sizeof_rt_sigframe + + /* Return address */ + .cfi_return_column 64 + .cfi_offset 64, B_GR + 0 /* pc */ + + /* Integer registers */ + .cfi_offset 1, B_GR + 1 * sizeof_reg /* r1 (ra) */ + .cfi_offset 2, B_GR + 2 * sizeof_reg /* r2 (sp) */ + .cfi_offset 3, B_GR + 3 * sizeof_reg + .cfi_offset 4, B_GR + 4 * sizeof_reg + .cfi_offset 5, B_GR + 5 * sizeof_reg + .cfi_offset 6, B_GR + 6 * sizeof_reg + .cfi_offset 7, B_GR + 7 * sizeof_reg + .cfi_offset 8, B_GR + 8 * sizeof_reg + .cfi_offset 9, B_GR + 9 * sizeof_reg + .cfi_offset 10, B_GR + 10 * sizeof_reg + .cfi_offset 11, B_GR + 11 * sizeof_reg + .cfi_offset 12, B_GR + 12 * sizeof_reg + .cfi_offset 13, B_GR + 13 * sizeof_reg + .cfi_offset 14, B_GR + 14 * sizeof_reg + .cfi_offset 15, B_GR + 15 * sizeof_reg + .cfi_offset 16, B_GR + 16 * sizeof_reg + .cfi_offset 17, B_GR + 17 * sizeof_reg + .cfi_offset 18, B_GR + 18 * sizeof_reg + .cfi_offset 19, B_GR + 19 * sizeof_reg + .cfi_offset 20, B_GR + 20 * sizeof_reg + .cfi_offset 21, B_GR + 21 * sizeof_reg + .cfi_offset 22, B_GR + 22 * sizeof_reg + .cfi_offset 23, B_GR + 23 * sizeof_reg + .cfi_offset 24, B_GR + 24 * sizeof_reg + .cfi_offset 25, B_GR + 25 * sizeof_reg + .cfi_offset 26, B_GR + 26 * sizeof_reg + .cfi_offset 27, B_GR + 27 * sizeof_reg + .cfi_offset 28, B_GR + 28 * sizeof_reg + .cfi_offset 29, B_GR + 29 * sizeof_reg + .cfi_offset 30, B_GR + 30 * sizeof_reg + .cfi_offset 31, B_GR + 31 * sizeof_reg /* r31 */ + + .cfi_offset 32, B_FR + 0 /* f0 */ + .cfi_offset 33, B_FR + 1 * sizeof_freg /* f1 */ + .cfi_offset 34, B_FR + 2 * sizeof_freg + .cfi_offset 35, B_FR + 3 * sizeof_freg + .cfi_offset 36, B_FR + 4 * sizeof_freg + .cfi_offset 37, B_FR + 5 * sizeof_freg + .cfi_offset 38, B_FR + 6 * sizeof_freg + .cfi_offset 39, B_FR + 7 * sizeof_freg + .cfi_offset 40, B_FR + 8 * sizeof_freg + .cfi_offset 41, B_FR + 9 * sizeof_freg + .cfi_offset 42, B_FR + 10 * sizeof_freg + .cfi_offset 43, B_FR + 11 * sizeof_freg + .cfi_offset 44, B_FR + 12 * sizeof_freg + .cfi_offset 45, B_FR + 13 * sizeof_freg + .cfi_offset 46, B_FR + 14 * sizeof_freg + .cfi_offset 47, B_FR + 15 * sizeof_freg + .cfi_offset 48, B_FR + 16 * sizeof_freg + .cfi_offset 49, B_FR + 17 * sizeof_freg + .cfi_offset 50, B_FR + 18 * sizeof_freg + .cfi_offset 51, B_FR + 19 * sizeof_freg + .cfi_offset 52, B_FR + 20 * sizeof_freg + .cfi_offset 53, B_FR + 21 * sizeof_freg + .cfi_offset 54, B_FR + 22 * sizeof_freg + .cfi_offset 55, B_FR + 23 * sizeof_freg + .cfi_offset 56, B_FR + 24 * sizeof_freg + .cfi_offset 57, B_FR + 25 * sizeof_freg + .cfi_offset 58, B_FR + 26 * sizeof_freg + .cfi_offset 59, B_FR + 27 * sizeof_freg + .cfi_offset 60, B_FR + 28 * sizeof_freg + .cfi_offset 61, B_FR + 29 * sizeof_freg + .cfi_offset 62, B_FR + 30 * sizeof_freg + .cfi_offset 63, B_FR + 31 * sizeof_freg /* f31 */ + + nop + +__vdso_rt_sigreturn: + raw_syscall __NR_rt_sigreturn +endf __vdso_rt_sigreturn + + .cfi_endproc diff --git a/linux-user/riscv/vdso.ld b/linux-user/riscv/vdso.ld new file mode 100644 index 0000000000..aabe2b0ab3 --- /dev/null +++ b/linux-user/riscv/vdso.ld @@ -0,0 +1,74 @@ +/* + * Linker script for linux riscv replacement vdso. + * + * Copyright 2021 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_4.15 { + global: + __vdso_rt_sigreturn; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; + __vdso_getcpu; + __vdso_flush_icache; + + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + /* + * We can't prelink to any address without knowing something about + * the virtual memory space of the host, since that leaks over into + * the available memory space of the guest. + */ + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load =0xd503201f +} diff --git a/linux-user/s390x/Makefile.vdso b/linux-user/s390x/Makefile.vdso new file mode 100644 index 0000000000..e82bf9e29f --- /dev/null +++ b/linux-user/s390x/Makefile.vdso @@ -0,0 +1,11 @@ +include $(BUILD_DIR)/tests/tcg/s390x-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/s390x +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso.so + +$(SUBDIR)/vdso.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ -nostdlib -shared -Wl,-h,linux-vdso64.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both \ + -Wl,-T,$(SUBDIR)/vdso.ld $< diff --git a/linux-user/s390x/meson.build b/linux-user/s390x/meson.build index 0781ccea1d..a7a25ed9ce 100644 --- a/linux-user/s390x/meson.build +++ b/linux-user/s390x/meson.build @@ -3,3 +3,9 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +vdso_inc = gen_vdso.process('vdso.so', extra_args: [ + '-s', '__kernel_sigreturn', + '-r', '__kernel_rt_sigreturn' + ]) +linux_user_ss.add(when: 'TARGET_S390X', if_true: vdso_inc) diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c index f72165576f..b40f738a70 100644 --- a/linux-user/s390x/signal.c +++ b/linux-user/s390x/signal.c @@ -21,13 +21,12 @@ #include "user-internals.h" #include "signal-common.h" #include "linux-user/trace.h" +#include "vdso-asmoffset.h" #define __NUM_GPRS 16 #define __NUM_FPRS 16 #define __NUM_ACRS 16 -#define __SIGNAL_FRAMESIZE 160 /* FIXME: 31-bit mode -> 96 */ - #define _SIGCONTEXT_NSIG 64 #define _SIGCONTEXT_NSIG_BPW 64 /* FIXME: 31-bit mode -> 32 */ #define _SIGCONTEXT_NSIG_WORDS (_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW) @@ -63,7 +62,7 @@ typedef struct { } target_sigcontext; typedef struct { - uint8_t callee_used_stack[__SIGNAL_FRAMESIZE]; + uint8_t callee_used_stack[STACK_FRAME_OVERHEAD]; target_sigcontext sc; target_sigregs sregs; int signo; @@ -83,7 +82,7 @@ struct target_ucontext { }; typedef struct { - uint8_t callee_used_stack[__SIGNAL_FRAMESIZE]; + uint8_t callee_used_stack[STACK_FRAME_OVERHEAD]; /* * This field is no longer initialized by the kernel, but it's still a part * of the ABI. diff --git a/linux-user/s390x/vdso-asmoffset.h b/linux-user/s390x/vdso-asmoffset.h new file mode 100644 index 0000000000..27a062d6c1 --- /dev/null +++ b/linux-user/s390x/vdso-asmoffset.h @@ -0,0 +1,2 @@ +/* Minimum stack frame size */ +#define STACK_FRAME_OVERHEAD 160 diff --git a/linux-user/s390x/vdso.S b/linux-user/s390x/vdso.S new file mode 100644 index 0000000000..3332492477 --- /dev/null +++ b/linux-user/s390x/vdso.S @@ -0,0 +1,61 @@ +/* + * s390x linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include "vdso-asmoffset.h" + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro raw_syscall n + .ifne \n < 0x100 + svc \n + .else + lghi %r1, \n + svc 0 + .endif +.endm + +.macro vdso_syscall name, nr +\name: + .cfi_startproc + aghi %r15, -(STACK_FRAME_OVERHEAD + 16) + .cfi_adjust_cfa_offset STACK_FRAME_OVERHEAD + 16 + stg %r14, STACK_FRAME_OVERHEAD(%r15) + .cfi_rel_offset %r14, STACK_FRAME_OVERHEAD + raw_syscall \nr + lg %r14, STACK_FRAME_OVERHEAD(%r15) + aghi %r15, STACK_FRAME_OVERHEAD + 16 + .cfi_restore %r14 + .cfi_adjust_cfa_offset -(STACK_FRAME_OVERHEAD + 16) + br %r14 + .cfi_endproc +endf \name +.endm + +vdso_syscall __kernel_gettimeofday, __NR_gettimeofday +vdso_syscall __kernel_clock_gettime, __NR_clock_gettime +vdso_syscall __kernel_clock_getres, __NR_clock_getres +vdso_syscall __kernel_getcpu, __NR_getcpu + +/* + * TODO unwind info, though we're ok without it. + * The kernel supplies bogus empty unwind info, and it is likely ignored + * by all users. Without it we get the fallback signal frame handling. + */ + +__kernel_sigreturn: + raw_syscall __NR_sigreturn +endf __kernel_sigreturn + +__kernel_rt_sigreturn: + raw_syscall __NR_rt_sigreturn +endf __kernel_rt_sigreturn diff --git a/linux-user/s390x/vdso.ld b/linux-user/s390x/vdso.ld new file mode 100644 index 0000000000..d3f1d1b164 --- /dev/null +++ b/linux-user/s390x/vdso.ld @@ -0,0 +1,72 @@ +/* + * Linker script for linux s390x replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_2.6.29 { + global: + __kernel_gettimeofday; + __kernel_clock_gettime; + __kernel_clock_getres; + __kernel_getcpu; + __kernel_rt_sigreturn; + __kernel_sigreturn; + /* + * QEMU handles syscall restart internally, so we don't + * need the __kernel_restart_syscall entry point. + */ + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + .data : { + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load +} diff --git a/linux-user/s390x/vdso.so b/linux-user/s390x/vdso.so new file mode 100755 index 0000000000..64130f6f33 Binary files /dev/null and b/linux-user/s390x/vdso.so differ diff --git a/linux-user/signal.c b/linux-user/signal.c index 3b8efec89f..b35d1e512f 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -536,11 +536,10 @@ static void signal_table_init(void) host_to_target_signal_table[SIGABRT] = 0; host_to_target_signal_table[hsig++] = TARGET_SIGABRT; - for (; hsig <= SIGRTMAX; hsig++) { - tsig = hsig - SIGRTMIN + TARGET_SIGRTMIN; - if (tsig <= TARGET_NSIG) { - host_to_target_signal_table[hsig] = tsig; - } + for (tsig = TARGET_SIGRTMIN; + hsig <= SIGRTMAX && tsig <= TARGET_NSIG; + hsig++, tsig++) { + host_to_target_signal_table[hsig] = tsig; } /* Invert the mapping that has already been assigned. */ diff --git a/linux-user/syscall.c b/linux-user/syscall.c index d49cd314a2..65ac3ac796 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7992,6 +7992,8 @@ static void open_self_maps_4(const struct open_self_maps_data *d, path = "[stack]"; } else if (start == info->brk) { path = "[heap]"; + } else if (start == info->vdso) { + path = "[vdso]"; } /* Except null device (MAP_ANON), adjust offset for this fragment. */ diff --git a/linux-user/x86_64/Makefile.vdso b/linux-user/x86_64/Makefile.vdso new file mode 100644 index 0000000000..26552b66db --- /dev/null +++ b/linux-user/x86_64/Makefile.vdso @@ -0,0 +1,11 @@ +include $(BUILD_DIR)/tests/tcg/x86_64-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/x86_64 +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso.so + +$(SUBDIR)/vdso.so: vdso.S vdso.ld + $(CC) -o $@ -nostdlib -shared -Wl,-h,linux-vdso.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both \ + -Wl,-T,$(SUBDIR)/vdso.ld $< diff --git a/linux-user/x86_64/meson.build b/linux-user/x86_64/meson.build index 203af9a60c..8c60da7a60 100644 --- a/linux-user/x86_64/meson.build +++ b/linux-user/x86_64/meson.build @@ -3,3 +3,7 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +vdso_inc = gen_vdso.process('vdso.so') + +linux_user_ss.add(when: 'TARGET_X86_64', if_true: vdso_inc) diff --git a/linux-user/x86_64/vdso.S b/linux-user/x86_64/vdso.S new file mode 100644 index 0000000000..47d16c00ab --- /dev/null +++ b/linux-user/x86_64/vdso.S @@ -0,0 +1,78 @@ +/* + * x86-64 linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro weakalias name +\name = __vdso_\name + .weak \name +.endm + +.macro vdso_syscall name, nr +__vdso_\name: + mov $\nr, %eax + syscall + ret +endf __vdso_\name +weakalias \name +.endm + + .cfi_startproc + +vdso_syscall clock_gettime, __NR_clock_gettime +vdso_syscall clock_getres, __NR_clock_getres +vdso_syscall gettimeofday, __NR_gettimeofday +vdso_syscall time, __NR_time + +__vdso_getcpu: + /* + * There is no syscall number for this allocated on x64. + * We can handle this several ways: + * + * (1) Invent a syscall number for use within qemu. + * It should be easy enough to pick a number that + * is well out of the way of the kernel numbers. + * + * (2) Force the emulated cpu to support the rdtscp insn, + * and initialize the TSC_AUX value the appropriate value. + * + * (3) Pretend that we're always running on cpu 0. + * + * This last is the one that's implemented here, with the + * tiny bit of extra code to support rdtscp in place. + */ + xor %ecx, %ecx /* rdtscp w/ tsc_aux = 0 */ + + /* if (cpu != NULL) *cpu = (ecx & 0xfff); */ + test %rdi, %rdi + jz 1f + mov %ecx, %eax + and $0xfff, %eax + mov %eax, (%rdi) + + /* if (node != NULL) *node = (ecx >> 12); */ +1: test %rsi, %rsi + jz 2f + shr $12, %ecx + mov %ecx, (%rsi) + +2: xor %eax, %eax + ret +endf __vdso_getcpu + +weakalias getcpu + + .cfi_endproc + +/* TODO: Add elf note for LINUX_VERSION_CODE */ diff --git a/linux-user/x86_64/vdso.ld b/linux-user/x86_64/vdso.ld new file mode 100644 index 0000000000..ca6001cc3c --- /dev/null +++ b/linux-user/x86_64/vdso.ld @@ -0,0 +1,73 @@ +/* + * Linker script for linux x86-64 replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + time; + __vdso_time; + clock_getres; + __vdso_clock_getres; + + local: *; + }; +} + + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + .data : { + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load =0x90909090 +} diff --git a/linux-user/x86_64/vdso.so b/linux-user/x86_64/vdso.so new file mode 100755 index 0000000000..c873d6ea58 Binary files /dev/null and b/linux-user/x86_64/vdso.so differ