mirror of
https://kernel.googlesource.com/pub/scm/linux/kernel/git/stable/linux-stable.git
synced 2025-09-14 11:19:08 +10:00
commit 31da94c25a
("riscv: add VMAP_STACK overflow detection") added
support for CONFIG_VMAP_STACK. If overflow is detected, CPU switches to
`shadow_stack` temporarily before switching finally to per-cpu
`overflow_stack`.
If two CPUs/harts are racing and end up in over flowing kernel stack, one
or both will end up corrupting each other state because `shadow_stack` is
not per-cpu. This patch optimizes per-cpu overflow stack switch by
directly picking per-cpu `overflow_stack` and gets rid of `shadow_stack`.
Following are the changes in this patch
- Defines an asm macro to obtain per-cpu symbols in destination
register.
- In entry.S, when overflow is detected, per-cpu overflow stack is
located using per-cpu asm macro. Computing per-cpu symbol requires
a temporary register. x31 is saved away into CSR_SCRATCH
(CSR_SCRATCH is anyways zero since we're in kernel).
Please see Links for additional relevant disccussion and alternative
solution.
Tested by `echo EXHAUST_STACK > /sys/kernel/debug/provoke-crash/DIRECT`
Kernel crash log below
Insufficient stack space to handle exception!/debug/provoke-crash/DIRECT
Task stack: [0xff20000010a98000..0xff20000010a9c000]
Overflow stack: [0xff600001f7d98370..0xff600001f7d99370]
CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34
Hardware name: riscv-virtio,qemu (DT)
epc : __memset+0x60/0xfc
ra : recursive_loop+0x48/0xc6 [lkdtm]
epc : ffffffff808de0e4 ra : ffffffff0163a752 sp : ff20000010a97e80
gp : ffffffff815c0330 tp : ff600000820ea280 t0 : ff20000010a97e88
t1 : 000000000000002e t2 : 3233206874706564 s0 : ff20000010a982b0
s1 : 0000000000000012 a0 : ff20000010a97e88 a1 : 0000000000000000
a2 : 0000000000000400 a3 : ff20000010a98288 a4 : 0000000000000000
a5 : 0000000000000000 a6 : fffffffffffe43f0 a7 : 00007fffffffffff
s2 : ff20000010a97e88 s3 : ffffffff01644680 s4 : ff20000010a9be90
s5 : ff600000842ba6c0 s6 : 00aaaaaac29e42b0 s7 : 00fffffff0aa3684
s8 : 00aaaaaac2978040 s9 : 0000000000000065 s10: 00ffffff8a7cad10
s11: 00ffffff8a76a4e0 t3 : ffffffff815dbaf4 t4 : ffffffff815dbaf4
t5 : ffffffff815dbab8 t6 : ff20000010a9bb48
status: 0000000200000120 badaddr: ff20000010a97e88 cause: 000000000000000f
Kernel panic - not syncing: Kernel stack overflow
CPU: 1 PID: 205 Comm: bash Not tainted 6.1.0-rc2-00001-g328a1f96f7b9 #34
Hardware name: riscv-virtio,qemu (DT)
Call Trace:
[<ffffffff80006754>] dump_backtrace+0x30/0x38
[<ffffffff808de798>] show_stack+0x40/0x4c
[<ffffffff808ea2a8>] dump_stack_lvl+0x44/0x5c
[<ffffffff808ea2d8>] dump_stack+0x18/0x20
[<ffffffff808dec06>] panic+0x126/0x2fe
[<ffffffff800065ea>] walk_stackframe+0x0/0xf0
[<ffffffff0163a752>] recursive_loop+0x48/0xc6 [lkdtm]
SMP: stopping secondary CPUs
---[ end Kernel panic - not syncing: Kernel stack overflow ]---
Cc: Guo Ren <guoren@kernel.org>
Cc: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/linux-riscv/Y347B0x4VUNOd6V7@xhacker/T/#t
Link: https://lore.kernel.org/lkml/20221124094845.1907443-1-debug@rivosinc.com/
Signed-off-by: Deepak Gupta <debug@rivosinc.com>
Co-developed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Acked-by: Guo Ren <guoren@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230927224757.1154247-9-samitolvanen@google.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
105 lines
3.3 KiB
C
105 lines
3.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
|
|
* Copyright (C) 2012 Regents of the University of California
|
|
* Copyright (C) 2017 SiFive
|
|
*/
|
|
|
|
#ifndef _ASM_RISCV_THREAD_INFO_H
|
|
#define _ASM_RISCV_THREAD_INFO_H
|
|
|
|
#include <asm/page.h>
|
|
#include <linux/const.h>
|
|
|
|
/* thread information allocation */
|
|
#define THREAD_SIZE_ORDER CONFIG_THREAD_SIZE_ORDER
|
|
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
|
|
|
/*
|
|
* By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
|
|
* checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
|
|
* assembly.
|
|
*/
|
|
#ifdef CONFIG_VMAP_STACK
|
|
#define THREAD_ALIGN (2 * THREAD_SIZE)
|
|
#else
|
|
#define THREAD_ALIGN THREAD_SIZE
|
|
#endif
|
|
|
|
#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER)
|
|
#define OVERFLOW_STACK_SIZE SZ_4K
|
|
#define SHADOW_OVERFLOW_STACK_SIZE (1024)
|
|
|
|
#define IRQ_STACK_SIZE THREAD_SIZE
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/csr.h>
|
|
|
|
/*
|
|
* low level task data that entry.S needs immediate access to
|
|
* - this struct should fit entirely inside of one cache line
|
|
* - if the members of this struct changes, the assembly constants
|
|
* in asm-offsets.c must be updated accordingly
|
|
* - thread_info is included in task_struct at an offset of 0. This means that
|
|
* tp points to both thread_info and task_struct.
|
|
*/
|
|
struct thread_info {
|
|
unsigned long flags; /* low level flags */
|
|
int preempt_count; /* 0=>preemptible, <0=>BUG */
|
|
/*
|
|
* These stack pointers are overwritten on every system call or
|
|
* exception. SP is also saved to the stack it can be recovered when
|
|
* overwritten.
|
|
*/
|
|
long kernel_sp; /* Kernel stack pointer */
|
|
long user_sp; /* User stack pointer */
|
|
int cpu;
|
|
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
|
|
};
|
|
|
|
/*
|
|
* macros/functions for gaining access to the thread information structure
|
|
*
|
|
* preempt_count needs to be 1 initially, until the scheduler is functional.
|
|
*/
|
|
#define INIT_THREAD_INFO(tsk) \
|
|
{ \
|
|
.flags = 0, \
|
|
.preempt_count = INIT_PREEMPT_COUNT, \
|
|
}
|
|
|
|
void arch_release_task_struct(struct task_struct *tsk);
|
|
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
/*
|
|
* thread information flags
|
|
* - these are process state flags that various assembly files may need to
|
|
* access
|
|
* - pending work-to-be-done flags are in lowest half-word
|
|
* - other flags in upper half-word(s)
|
|
*/
|
|
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
|
|
#define TIF_SIGPENDING 2 /* signal pending */
|
|
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
|
|
#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
|
|
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
|
|
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
|
|
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
|
|
#define TIF_32BIT 11 /* compat-mode 32bit process */
|
|
|
|
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
|
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
|
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
|
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
|
|
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
|
|
|
#define _TIF_WORK_MASK \
|
|
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
|
|
_TIF_NOTIFY_SIGNAL | _TIF_UPROBE)
|
|
|
|
#endif /* _ASM_RISCV_THREAD_INFO_H */
|