From 117dccc8d01ab70decb5d6a529f4780aaba20e0b Mon Sep 17 00:00:00 2001 From: Mike Engel Date: Sat, 24 Feb 2024 17:03:36 +0100 Subject: [PATCH 06/10] kernel: add NXP RT support Upstream-Status: Inappropriate [DEY specific] Signed-off-by: Mike Engel --- kernel/Kconfig.preempt | 6 + kernel/bpf/syscall.c | 4 +- kernel/dma/coherent.c | 15 +- kernel/entry/common.c | 4 +- kernel/hung_task.c | 11 +- kernel/ksysfs.c | 12 + kernel/panic.c | 10 +- kernel/printk/internal.h | 2 + kernel/printk/printk.c | 890 ++++++++++++++++++++++++++++++++---- kernel/printk/printk_safe.c | 32 ++ kernel/rcu/rcutorture.c | 6 + kernel/rcu/tree_stall.h | 2 + kernel/reboot.c | 16 +- kernel/sched/core.c | 160 ++++++- kernel/sched/fair.c | 16 +- kernel/sched/features.h | 3 + kernel/sched/sched.h | 9 + kernel/signal.c | 8 +- kernel/softirq.c | 82 +++- kernel/time/hrtimer.c | 4 +- kernel/time/tick-sched.c | 2 +- kernel/time/timer.c | 2 +- kernel/trace/trace.c | 50 +- kernel/trace/trace_events.c | 1 + kernel/trace/trace_output.c | 18 +- kernel/watchdog.c | 4 + kernel/watchdog_hld.c | 4 + 27 files changed, 1234 insertions(+), 139 deletions(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c2f1fd95a821..260c08efeb48 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,5 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only +config HAVE_PREEMPT_LAZY + bool + +config PREEMPT_LAZY + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT + config PREEMPT_NONE_BUILD bool diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0c8b7733573e..c0915e2424f1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2115,11 +2115,11 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&st->syncp); + start = u64_stats_fetch_begin(&st->syncp); tnsecs = u64_stats_read(&st->nsecs); tcnt = u64_stats_read(&st->cnt); tmisses = u64_stats_read(&st->misses); - } while (u64_stats_fetch_retry_irq(&st->syncp, start)); + } while (u64_stats_fetch_retry(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; misses += tmisses; diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index c21abc77c53e..f15ba6c6358e 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -36,7 +36,8 @@ static inline dma_addr_t dma_get_device_base(struct device *dev, } static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, bool use_dma_pfn_offset) + dma_addr_t device_addr, size_t size, bool use_dma_pfn_offset, + bool cacheable) { struct dma_coherent_mem *dma_mem; int pages = size >> PAGE_SHIFT; @@ -45,7 +46,8 @@ static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr, if (!size) return ERR_PTR(-EINVAL); - mem_base = memremap(phys_addr, size, MEMREMAP_WC); + mem_base = memremap(phys_addr, size, cacheable ? MEMREMAP_WB : + MEMREMAP_WC); if (!mem_base) return ERR_PTR(-EINVAL); @@ -119,8 +121,10 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, { struct dma_coherent_mem *mem; int ret; + bool cacheable = dev_is_dma_coherent(dev); - mem = dma_init_coherent_memory(phys_addr, device_addr, size, false); + mem = dma_init_coherent_memory(phys_addr, device_addr, size, false, + cacheable); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -310,7 +314,7 @@ int dma_init_global_coherent(phys_addr_t phys_addr, size_t size) { struct dma_coherent_mem *mem; - mem = dma_init_coherent_memory(phys_addr, phys_addr, size, true); + mem = dma_init_coherent_memory(phys_addr, phys_addr, size, true, false); if (IS_ERR(mem)) return PTR_ERR(mem); dma_coherent_default_memory = mem; @@ -335,9 +339,10 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) { if (!rmem->priv) { struct dma_coherent_mem *mem; + bool cacheable = dev_is_dma_coherent(dev); mem = dma_init_coherent_memory(rmem->base, rmem->base, - rmem->size, true); + rmem->size, true, cacheable); if (IS_ERR(mem)) return PTR_ERR(mem); rmem->priv = mem; diff --git a/kernel/entry/common.c b/kernel/entry/common.c index be61332c66b5..c6301e520d47 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -155,7 +155,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, local_irq_enable_exit_to_user(ti_work); - if (ti_work & _TIF_NEED_RESCHED) + if (ti_work & _TIF_NEED_RESCHED_MASK) schedule(); if (ti_work & _TIF_UPROBE) @@ -386,7 +386,7 @@ void raw_irqentry_exit_cond_resched(void) rcu_irq_exit_check_preempt(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) WARN_ON_ONCE(!on_thread_stack()); - if (need_resched()) + if (should_resched(0)) preempt_schedule_irq(); } } diff --git a/kernel/hung_task.c b/kernel/hung_task.c index c71889f3f3fc..e2d2344cb9f4 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,6 +127,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) * complain: */ if (sysctl_hung_task_warnings) { + printk_prefer_direct_enter(); + if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", @@ -142,6 +144,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_all_cpu_backtrace) hung_task_show_all_bt = true; + + printk_prefer_direct_exit(); } touch_nmi_watchdog(); @@ -212,12 +216,17 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } unlock: rcu_read_unlock(); - if (hung_task_show_lock) + if (hung_task_show_lock) { + printk_prefer_direct_enter(); debug_show_all_locks(); + printk_prefer_direct_exit(); + } if (hung_task_show_all_bt) { hung_task_show_all_bt = false; + printk_prefer_direct_enter(); trigger_all_cpu_backtrace(); + printk_prefer_direct_exit(); } if (hung_task_call_panic) diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 65dba9076f31..ab18048e2186 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -142,6 +142,15 @@ KERNEL_ATTR_RO(vmcoreinfo); #endif /* CONFIG_CRASH_CORE */ +#if defined(CONFIG_PREEMPT_RT) +static ssize_t realtime_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", 1); +} +KERNEL_ATTR_RO(realtime); +#endif + /* whether file capabilities are enabled */ static ssize_t fscaps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -232,6 +241,9 @@ static struct attribute * kernel_attrs[] = { #ifndef CONFIG_TINY_RCU &rcu_expedited_attr.attr, &rcu_normal_attr.attr, +#endif +#ifdef CONFIG_PREEMPT_RT + &realtime_attr.attr, #endif NULL }; diff --git a/kernel/panic.c b/kernel/panic.c index 63e94f3bd8dc..97cc495d95f8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -322,7 +322,6 @@ void panic(const char *fmt, ...) panic_smp_self_stop(); console_verbose(); - bust_spinlocks(1); va_start(args, fmt); len = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); @@ -339,6 +338,11 @@ void panic(const char *fmt, ...) dump_stack(); #endif + /* If atomic consoles are available, flush the kernel log. */ + console_flush_on_panic(CONSOLE_ATOMIC_FLUSH_PENDING); + + bust_spinlocks(1); + /* * If kgdb is enabled, give it a chance to run before we stop all * the other CPUs or else we won't be able to debug processes left @@ -653,6 +657,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, { disable_trace_on_warning(); + printk_prefer_direct_enter(); + if (file) pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", raw_smp_processor_id(), current->pid, file, line, @@ -681,6 +687,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, /* Just a warning, don't kill lockdep. */ add_taint(taint, LOCKDEP_STILL_OK); + + printk_prefer_direct_exit(); } #ifndef __WARN_FLAGS diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index d947ca6c84f9..e7d8578860ad 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -20,6 +20,8 @@ enum printk_info_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; +extern bool block_console_kthreads; + __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index cc53fb77f77c..b245b2f60e6a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -220,6 +221,36 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, } #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ +/* + * Used to synchronize printing kthreads against direct printing via + * console_trylock/console_unlock. + * + * Values: + * -1 = console kthreads atomically blocked (via global trylock) + * 0 = no kthread printing, console not locked (via trylock) + * >0 = kthread(s) actively printing + * + * Note: For synchronizing against direct printing via + * console_lock/console_unlock, see the @lock variable in + * struct console. + */ +static atomic_t console_kthreads_active = ATOMIC_INIT(0); + +#define console_kthreads_atomic_tryblock() \ + (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) +#define console_kthreads_atomic_unblock() \ + atomic_cmpxchg(&console_kthreads_active, -1, 0) +#define console_kthreads_atomically_blocked() \ + (atomic_read(&console_kthreads_active) == -1) + +#define console_kthread_printing_tryenter() \ + atomic_inc_unless_negative(&console_kthreads_active) +#define console_kthread_printing_exit() \ + atomic_dec(&console_kthreads_active) + +/* Block console kthreads to avoid processing new messages. */ +bool block_console_kthreads; + /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. @@ -268,14 +299,49 @@ static bool panic_in_progress(void) } /* - * This is used for debugging the mess that is the VT code by - * keeping track if we have the console semaphore held. It's - * definitely not the perfect debug tool (we don't know if _WE_ - * hold it and are racing, but it helps tracking those weird code - * paths in the console code where we end up in places I want - * locked without the console semaphore held). + * Tracks whether kthread printers are all blocked. A value of true implies + * that the console is locked via console_lock() or the console is suspended. + * Writing to this variable requires holding @console_sem. + */ +static bool console_kthreads_blocked; + +/* + * Block all kthread printers from a schedulable context. + * + * Requires holding @console_sem. + */ +static void console_kthreads_block(void) +{ + struct console *con; + + for_each_console(con) { + mutex_lock(&con->lock); + con->blocked = true; + mutex_unlock(&con->lock); + } + + console_kthreads_blocked = true; +} + +/* + * Unblock all kthread printers from a schedulable context. + * + * Requires holding @console_sem. */ -static int console_locked, console_suspended; +static void console_kthreads_unblock(void) +{ + struct console *con; + + for_each_console(con) { + mutex_lock(&con->lock); + con->blocked = false; + mutex_unlock(&con->lock); + } + + console_kthreads_blocked = false; +} + +static int console_suspended; /* * Array of consoles built from command line options (console=) @@ -358,7 +424,75 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); +/* + * A flag to signify if printk_activate_kthreads() has already started the + * kthread printers. If true, any later registered consoles must start their + * own kthread directly. The flag is write protected by the console_lock. + */ +static bool printk_kthreads_available; + #ifdef CONFIG_PRINTK +static atomic_t printk_prefer_direct = ATOMIC_INIT(0); + +/** + * printk_prefer_direct_enter - cause printk() calls to attempt direct + * printing to all enabled consoles + * + * Since it is not possible to call into the console printing code from any + * context, there is no guarantee that direct printing will occur. + * + * This globally effects all printk() callers. + * + * Context: Any context. + */ +void printk_prefer_direct_enter(void) +{ + atomic_inc(&printk_prefer_direct); +} + +/** + * printk_prefer_direct_exit - restore printk() behavior + * + * Context: Any context. + */ +void printk_prefer_direct_exit(void) +{ + WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); +} + +/* + * Calling printk() always wakes kthread printers so that they can + * flush the new message to their respective consoles. Also, if direct + * printing is allowed, printk() tries to flush the messages directly. + * + * Direct printing is allowed in situations when the kthreads + * are not available or the system is in a problematic state. + * + * See the implementation about possible races. + */ +static inline bool allow_direct_printing(void) +{ + /* + * Checking kthread availability is a possible race because the + * kthread printers can become permanently disabled during runtime. + * However, doing that requires holding the console_lock, so any + * pending messages will be direct printed by console_unlock(). + */ + if (!printk_kthreads_available) + return true; + + /* + * Prefer direct printing when the system is in a problematic state. + * The context that sets this state will always see the updated value. + * The other contexts do not care. Anyway, direct printing is just a + * best effort. The direct output is only possible when console_lock + * is not already taken and no kthread printers are actively printing. + */ + return (system_state > SYSTEM_RUNNING || + oops_in_progress || + atomic_read(&printk_prefer_direct)); +} + DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -1847,6 +1981,7 @@ static int console_lock_spinning_disable_and_check(void) return 1; } +#if !IS_ENABLED(CONFIG_PREEMPT_RT) /** * console_trylock_spinning - try to get console_lock by busy waiting * @@ -1920,6 +2055,7 @@ static int console_trylock_spinning(void) return 1; } +#endif /* CONFIG_PREEMPT_RT */ /* * Call the specified console driver, asking it to write out the specified @@ -1927,19 +2063,28 @@ static int console_trylock_spinning(void) * dropped, a dropped message will be written out first. */ static void call_console_driver(struct console *con, const char *text, size_t len, - char *dropped_text) + char *dropped_text, bool atomic_printing) { + unsigned long dropped = 0; size_t dropped_len; - if (con->dropped && dropped_text) { + if (dropped_text) + dropped = atomic_long_xchg_relaxed(&con->dropped, 0); + + if (dropped) { dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX, "** %lu printk messages dropped **\n", - con->dropped); - con->dropped = 0; - con->write(con, dropped_text, dropped_len); + dropped); + if (atomic_printing) + con->write_atomic(con, dropped_text, dropped_len); + else + con->write(con, dropped_text, dropped_len); } - con->write(con, text, len); + if (atomic_printing) + con->write_atomic(con, text, len); + else + con->write(con, text, len); } /* @@ -2249,10 +2394,22 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len = vprintk_store(facility, level, dev_info, fmt, args); /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (!in_sched && allow_direct_printing()) { +#if IS_ENABLED(CONFIG_PREEMPT_RT) + /* + * Use the non-spinning trylock since PREEMPT_RT does not + * support console lock handovers. + * + * Direct printing will most likely involve taking spinlocks. + * For PREEMPT_RT, this is only allowed if in a preemptible + * context. + */ + if (preemptible() && console_trylock()) + console_unlock(); +#else /* * The caller may be holding system-critical or - * timing-sensitive locks. Disable preemption during + * timing-sensitive locks. Disable preemption during direct * printing of all remaining records to all consoles so that * this context can return as soon as possible. Hopefully * another printk() caller will take over the printing. @@ -2267,6 +2424,7 @@ asmlinkage int vprintk_emit(int facility, int level, if (console_trylock_spinning()) console_unlock(); preempt_enable(); +#endif } if (in_sched) @@ -2297,9 +2455,81 @@ asmlinkage __visible int _printk(const char *fmt, ...) } EXPORT_SYMBOL(_printk); +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE +static void __free_atomic_data(struct console_atomic_data *d) +{ + kfree(d->text); + kfree(d->ext_text); + kfree(d->dropped_text); +} + +static void free_atomic_data(struct console_atomic_data *d) +{ + int count = 1; + int i; + + if (!d) + return; + +#ifdef CONFIG_HAVE_NMI + count = 2; +#endif + + for (i = 0; i < count; i++) + __free_atomic_data(&d[i]); + kfree(d); +} + +static int __alloc_atomic_data(struct console_atomic_data *d, short flags) +{ + d->text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!d->text) + return -1; + + if (flags & CON_EXTENDED) { + d->ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); + if (!d->ext_text) + return -1; + } else { + d->dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); + if (!d->dropped_text) + return -1; + } + + return 0; +} + +static struct console_atomic_data *alloc_atomic_data(short flags) +{ + struct console_atomic_data *d; + int count = 1; + int i; + +#ifdef CONFIG_HAVE_NMI + count = 2; +#endif + + d = kzalloc(sizeof(*d) * count, GFP_KERNEL); + if (!d) + goto err_out; + + for (i = 0; i < count; i++) { + if (__alloc_atomic_data(&d[i], flags) != 0) + goto err_out; + } + + return d; +err_out: + free_atomic_data(d); + return NULL; +} +#endif /* CONFIG_HAVE_ATOMIC_CONSOLE */ + static bool pr_flush(int timeout_ms, bool reset_on_progress); static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); +static void printk_start_kthread(struct console *con); + #else /* CONFIG_PRINTK */ #define CONSOLE_LOG_MAX 0 @@ -2310,6 +2540,8 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre #define prb_first_valid_seq(rb) 0 #define prb_next_seq(rb) 0 +#define free_atomic_data(d) + static u64 syslog_seq; static size_t record_print_text(const struct printk_record *r, @@ -2328,12 +2560,14 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, static void console_lock_spinning_enable(void) { } static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_driver(struct console *con, const char *text, size_t len, - char *dropped_text) + char *dropped_text, bool atomic_printing) { } static bool suppress_message_printing(int level) { return false; } static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } +static void printk_start_kthread(struct console *con) { } +static bool allow_direct_printing(void) { return true; } #endif /* CONFIG_PRINTK */ @@ -2552,6 +2786,14 @@ static int console_cpu_notify(unsigned int cpu) /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); + else { + /* + * If a new CPU comes online, the conditions for + * printer_should_wake() may have changed for some + * kthread printer with !CON_ANYTIME. + */ + wake_up_klogd(); + } } return 0; } @@ -2594,7 +2836,7 @@ void console_lock(void) down_console_sem(); if (console_suspended) return; - console_locked = 1; + console_kthreads_block(); console_may_schedule = 1; } EXPORT_SYMBOL(console_lock); @@ -2618,30 +2860,36 @@ int console_trylock(void) up_console_sem(); return 0; } - console_locked = 1; + if (!console_kthreads_atomic_tryblock()) { + up_console_sem(); + return 0; + } console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); +/* + * This is used to help to make sure that certain paths within the VT code are + * running with the console lock held. It is definitely not the perfect debug + * tool (it is not known if the VT code is the task holding the console lock), + * but it helps tracking those weird code paths in the console code such as + * when the console is suspended: where the console is not locked but no + * console printing may occur. + * + * Note: This returns true when the console is suspended but is not locked. + * This is intentional because the VT code must consider that situation + * the same as if the console was locked. + */ int is_console_locked(void) { - return console_locked; + return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); } EXPORT_SYMBOL(is_console_locked); -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires the console_lock. - */ -static inline bool console_is_usable(struct console *con) +static inline bool __console_is_usable(short flags) { - if (!(con->flags & CON_ENABLED)) - return false; - - if (!con->write) + if (!(flags & CON_ENABLED)) return false; /* @@ -2650,18 +2898,116 @@ static inline bool console_is_usable(struct console *con) * cope (CON_ANYTIME) don't call them until this CPU is officially up. */ if (!cpu_online(raw_smp_processor_id()) && - !(con->flags & CON_ANYTIME)) + !(flags & CON_ANYTIME)) return false; return true; } +/* + * Check if the given console is currently capable and allowed to print + * records. + * + * Requires holding the console_lock. + */ +static inline bool console_is_usable(struct console *con, bool atomic_printing) +{ + if (atomic_printing) { +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + if (!con->write_atomic) + return false; + if (!con->atomic_data) + return false; +#else + return false; +#endif + } else if (!con->write) { + return false; + } + + return __console_is_usable(con->flags); +} + static void __console_unlock(void) { - console_locked = 0; + /* + * Depending on whether console_lock() or console_trylock() was used, + * appropriately allow the kthread printers to continue. + */ + if (console_kthreads_blocked) + console_kthreads_unblock(); + else + console_kthreads_atomic_unblock(); + + /* + * New records may have arrived while the console was locked. + * Wake the kthread printers to print them. + */ + wake_up_klogd(); + up_console_sem(); } +static u64 read_console_seq(struct console *con) +{ +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + unsigned long flags; + u64 seq2; + u64 seq; + + if (!con->atomic_data) + return con->seq; + + printk_cpu_sync_get_irqsave(flags); + + seq = con->seq; + seq2 = con->atomic_data[0].seq; + if (seq2 > seq) + seq = seq2; +#ifdef CONFIG_HAVE_NMI + seq2 = con->atomic_data[1].seq; + if (seq2 > seq) + seq = seq2; +#endif + + printk_cpu_sync_put_irqrestore(flags); + + return seq; +#else /* CONFIG_HAVE_ATOMIC_CONSOLE */ + return con->seq; +#endif +} + +static void write_console_seq(struct console *con, u64 val, bool atomic_printing) +{ +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + unsigned long flags; + u64 *seq; + + if (!con->atomic_data) { + con->seq = val; + return; + } + + printk_cpu_sync_get_irqsave(flags); + + if (atomic_printing) { + seq = &con->atomic_data[0].seq; +#ifdef CONFIG_HAVE_NMI + if (in_nmi()) + seq = &con->atomic_data[1].seq; +#endif + } else { + seq = &con->seq; + } + *seq = val; + + printk_cpu_sync_put_irqrestore(flags); +#else /* CONFIG_HAVE_ATOMIC_CONSOLE */ + con->seq = val; +#endif +} + /* * Print one record for the given console. The record printed is whatever * record is the next available record for the given console. @@ -2674,36 +3020,47 @@ static void __console_unlock(void) * If dropped messages should be printed, @dropped_text is a buffer of size * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL. * + * @atomic_printing specifies if atomic printing should be used. + * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the - * console_lock. Otherwise it is set to false. + * console_lock. Otherwise it is set to false. A NULL pointer may be provided + * to disable allowing the console_lock to be taken over by a printk waiter. * * Returns false if the given console has no next record to print, otherwise * true. * - * Requires the console_lock. + * Requires the console_lock if @handover is non-NULL. + * Requires con->lock otherwise. */ -static bool console_emit_next_record(struct console *con, char *text, char *ext_text, - char *dropped_text, bool *handover) +static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool atomic_printing, + bool *handover) { - static int panic_console_dropped; + static atomic_t panic_console_dropped = ATOMIC_INIT(0); struct printk_info info; struct printk_record r; unsigned long flags; char *write_text; size_t len; + u64 seq; prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - *handover = false; + if (handover) + *handover = false; - if (!prb_read_valid(prb, con->seq, &r)) + seq = read_console_seq(con); + + if (!prb_read_valid(prb, seq, &r)) return false; - if (con->seq != r.info->seq) { - con->dropped += r.info->seq - con->seq; - con->seq = r.info->seq; - if (panic_in_progress() && panic_console_dropped++ > 10) { + if (seq != r.info->seq) { + atomic_long_add((unsigned long)(r.info->seq - seq), &con->dropped); + write_console_seq(con, r.info->seq, atomic_printing); + seq = r.info->seq; + if (panic_in_progress() && + atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { suppress_panic_printk = 1; pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); } @@ -2711,7 +3068,7 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ /* Skip record that has level above the console loglevel. */ if (suppress_message_printing(r.info->level)) { - con->seq++; + write_console_seq(con, seq + 1, atomic_printing); goto skip; } @@ -2725,31 +3082,65 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); } - /* - * While actively printing out messages, if another printk() - * were to occur on another CPU, it may wait for this one to - * finish. This task can not be preempted if there is a - * waiter waiting to take over. - * - * Interrupts are disabled because the hand over to a waiter - * must not be interrupted until the hand over is completed - * (@console_waiter is cleared). - */ - printk_safe_enter_irqsave(flags); - console_lock_spinning_enable(); + if (handover) { + /* + * While actively printing out messages, if another printk() + * were to occur on another CPU, it may wait for this one to + * finish. This task can not be preempted if there is a + * waiter waiting to take over. + * + * Interrupts are disabled because the hand over to a waiter + * must not be interrupted until the hand over is completed + * (@console_waiter is cleared). + */ + printk_safe_enter_irqsave(flags); + console_lock_spinning_enable(); - stop_critical_timings(); /* don't trace print latency */ - call_console_driver(con, write_text, len, dropped_text); - start_critical_timings(); + /* don't trace irqsoff print latency */ + stop_critical_timings(); + } - con->seq++; + call_console_driver(con, write_text, len, dropped_text, atomic_printing); - *handover = console_lock_spinning_disable_and_check(); - printk_safe_exit_irqrestore(flags); + write_console_seq(con, seq + 1, atomic_printing); + + if (handover) { + start_critical_timings(); + *handover = console_lock_spinning_disable_and_check(); + printk_safe_exit_irqrestore(flags); + } skip: return true; } +/* + * Print a record for a given console, but allow another printk() caller to + * take over the console_lock and continue printing. + * + * Requires the console_lock, but depending on @handover after the call, the + * caller may no longer have the console_lock. + * + * See __console_emit_next_record() for argument and return details. + */ +static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, + char *dropped_text, bool *handover) +{ + /* + * Handovers are only supported if threaded printers are atomically + * blocked. The context taking over the console_lock may be atomic. + * + * PREEMPT_RT also does not support handovers because the spinning + * waiter can cause large latencies. + */ + if (!console_kthreads_atomically_blocked() || + IS_ENABLED(CONFIG_PREEMPT_RT)) { + *handover = false; + handover = NULL; + } + + return __console_emit_next_record(con, text, ext_text, dropped_text, false, handover); +} + /* * Print out all remaining records to all consoles. * @@ -2768,8 +3159,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ * were flushed to all usable consoles. A returned false informs the caller * that everything was not flushed (either there were no usable consoles or * another context has taken over printing or it is a panic situation and this - * is not the panic CPU). Regardless the reason, the caller should assume it - * is not useful to immediately try again. + * is not the panic CPU or direct printing is not preferred). Regardless the + * reason, the caller should assume it is not useful to immediately try again. * * Requires the console_lock. */ @@ -2786,24 +3177,26 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove *handover = false; do { + /* Let the kthread printers do the work if they can. */ + if (!allow_direct_printing()) + return false; + any_progress = false; for_each_console(con) { bool progress; - if (!console_is_usable(con)) + if (!console_is_usable(con, false)) continue; any_usable = true; if (con->flags & CON_EXTENDED) { /* Extended consoles do not print "dropped messages". */ - progress = console_emit_next_record(con, &text[0], - &ext_text[0], NULL, - handover); + progress = console_emit_next_record_transferable(con, &text[0], + &ext_text[0], NULL, handover); } else { - progress = console_emit_next_record(con, &text[0], - NULL, &dropped_text[0], - handover); + progress = console_emit_next_record_transferable(con, &text[0], + NULL, &dropped_text[0], handover); } if (*handover) return false; @@ -2828,6 +3221,68 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove return any_usable; } +#if defined(CONFIG_HAVE_ATOMIC_CONSOLE) && defined(CONFIG_PRINTK) +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool atomic_printing); + +static void atomic_console_flush_all(void) +{ + unsigned long flags; + struct console *con; + bool any_progress; + int index = 0; + + if (console_suspended) + return; + +#ifdef CONFIG_HAVE_NMI + if (in_nmi()) + index = 1; +#endif + + printk_cpu_sync_get_irqsave(flags); + + do { + any_progress = false; + + for_each_console(con) { + bool progress; + + if (!console_is_usable(con, true)) + continue; + + if (con->flags & CON_EXTENDED) { + /* Extended consoles do not print "dropped messages". */ + progress = console_emit_next_record(con, + &con->atomic_data->text[index], + &con->atomic_data->ext_text[index], + NULL, + true); + } else { + progress = console_emit_next_record(con, + &con->atomic_data->text[index], + NULL, + &con->atomic_data->dropped_text[index], + true); + } + + if (!progress) + continue; + any_progress = true; + + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); + touch_nmi_watchdog(); + } + } while (any_progress); + + printk_cpu_sync_put_irqrestore(flags); +} +#else /* CONFIG_HAVE_ATOMIC_CONSOLE && CONFIG_PRINTK */ +#define atomic_console_flush_all() +#endif + /** * console_unlock - unlock the console system * @@ -2918,10 +3373,13 @@ void console_unblank(void) if (oops_in_progress) { if (down_trylock_console_sem() != 0) return; + if (!console_kthreads_atomic_tryblock()) { + up_console_sem(); + return; + } } else console_lock(); - console_locked = 1; console_may_schedule = 0; for_each_console(c) if ((c->flags & CON_ENABLED) && c->unblank) @@ -2940,6 +3398,11 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + if (mode == CONSOLE_ATOMIC_FLUSH_PENDING) { + atomic_console_flush_all(); + return; + } + /* * If someone else is holding the console lock, trylock will fail * and may_schedule may be set. Ignore and proceed to unlock so @@ -2956,7 +3419,7 @@ void console_flush_on_panic(enum con_flush_mode mode) seq = prb_first_valid_seq(prb); for_each_console(c) - c->seq = seq; + write_console_seq(c, seq, false); } console_unlock(); } @@ -3196,16 +3659,27 @@ void register_console(struct console *newcon) console_drivers->next = newcon; } - newcon->dropped = 0; + atomic_long_set(&newcon->dropped, 0); + newcon->thread = NULL; + newcon->blocked = true; + mutex_init(&newcon->lock); +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + newcon->atomic_data = NULL; +#endif + if (newcon->flags & CON_PRINTBUFFER) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); - newcon->seq = syslog_seq; + write_console_seq(newcon, syslog_seq, false); mutex_unlock(&syslog_lock); } else { /* Begin with next message. */ - newcon->seq = prb_next_seq(prb); + write_console_seq(newcon, prb_next_seq(prb), false); } + + if (printk_kthreads_available) + printk_start_kthread(newcon); + console_unlock(); console_sysfs_notify(); @@ -3229,6 +3703,7 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { + struct task_struct *thd; struct console *con; int res; @@ -3266,9 +3741,26 @@ int unregister_console(struct console *console) console_drivers->flags |= CON_CONSDEV; console->flags &= ~CON_ENABLED; + + /* + * console->thread can only be cleared under the console lock. But + * stopping the thread must be done without the console lock. The + * task that clears @thread is the task that stops the kthread. + */ + thd = console->thread; + console->thread = NULL; + console_unlock(); + + if (thd) + kthread_stop(thd); + console_sysfs_notify(); +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + free_atomic_data(console->atomic_data); +#endif + if (console->exit) res = console->exit(console); @@ -3362,6 +3854,20 @@ static int __init printk_late_init(void) } late_initcall(printk_late_init); +static int __init printk_activate_kthreads(void) +{ + struct console *con; + + console_lock(); + printk_kthreads_available = true; + for_each_console(con) + printk_start_kthread(con); + console_unlock(); + + return 0; +} +early_initcall(printk_activate_kthreads); + #if defined CONFIG_PRINTK /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) @@ -3385,7 +3891,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre for_each_console(c) { if (con && con != c) continue; - if (!console_is_usable(c)) + if (!console_is_usable(c, false)) continue; printk_seq = c->seq; if (printk_seq < seq) @@ -3444,11 +3950,214 @@ static bool pr_flush(int timeout_ms, bool reset_on_progress) return __pr_flush(NULL, timeout_ms, reset_on_progress); } +static void __printk_fallback_preferred_direct(void) +{ + printk_prefer_direct_enter(); + pr_err("falling back to preferred direct printing\n"); + printk_kthreads_available = false; +} + +/* + * Enter preferred direct printing, but never exit. Mark console threads as + * unavailable. The system is then forever in preferred direct printing and + * any printing threads will exit. + * + * Must *not* be called under console_lock. Use + * __printk_fallback_preferred_direct() if already holding console_lock. + */ +static void printk_fallback_preferred_direct(void) +{ + console_lock(); + __printk_fallback_preferred_direct(); + console_unlock(); +} + +/* + * Print a record for a given console, not allowing another printk() caller + * to take over. This is appropriate for contexts that do not have the + * console_lock. + * + * See __console_emit_next_record() for argument and return details. + */ +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, + char *dropped_text, bool atomic_printing) +{ + return __console_emit_next_record(con, text, ext_text, dropped_text, + atomic_printing, NULL); +} + +static bool printer_should_wake(struct console *con, u64 seq) +{ + short flags; + + if (kthread_should_stop() || !printk_kthreads_available) + return true; + + if (con->blocked || + console_kthreads_atomically_blocked() || + block_console_kthreads || + system_state > SYSTEM_RUNNING || + oops_in_progress) { + return false; + } + + /* + * This is an unsafe read from con->flags, but a false positive is + * not a problem. Worst case it would allow the printer to wake up + * although it is disabled. But the printer will notice that when + * attempting to print and instead go back to sleep. + */ + flags = data_race(READ_ONCE(con->flags)); + + if (!__console_is_usable(flags)) + return false; + + return prb_read_valid(prb, seq, NULL); +} + +static int printk_kthread_func(void *data) +{ + struct console *con = data; + char *dropped_text = NULL; + char *ext_text = NULL; + u64 seq = 0; + char *text; + int error; + +#ifdef CONFIG_HAVE_ATOMIC_CONSOLE + if (con->write_atomic) + con->atomic_data = alloc_atomic_data(con->flags); +#endif + + text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!text) { + con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); + printk_fallback_preferred_direct(); + goto out; + } + + if (con->flags & CON_EXTENDED) { + ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); + if (!ext_text) { + con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); + printk_fallback_preferred_direct(); + goto out; + } + } else { + dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); + if (!dropped_text) { + con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); + printk_fallback_preferred_direct(); + goto out; + } + } + + con_printk(KERN_INFO, con, "printing thread started\n"); + for (;;) { + /* + * Guarantee this task is visible on the waitqueue before + * checking the wake condition. + * + * The full memory barrier within set_current_state() of + * prepare_to_wait_event() pairs with the full memory barrier + * within wq_has_sleeper(). + * + * This pairs with __wake_up_klogd:A. + */ + error = wait_event_interruptible(log_wait, + printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ + + if (kthread_should_stop() || !printk_kthreads_available) + break; + + if (error) + continue; + + error = mutex_lock_interruptible(&con->lock); + if (error) + continue; + + if (con->blocked || + !console_kthread_printing_tryenter()) { + /* Another context has locked the console_lock. */ + mutex_unlock(&con->lock); + continue; + } + + /* + * Although this context has not locked the console_lock, it + * is known that the console_lock is not locked and it is not + * possible for any other context to lock the console_lock. + * Therefore it is safe to read con->flags. + */ + + if (!__console_is_usable(con->flags)) { + console_kthread_printing_exit(); + mutex_unlock(&con->lock); + continue; + } + + /* + * Even though the printk kthread is always preemptible, it is + * still not allowed to call cond_resched() from within + * console drivers. The task may become non-preemptible in the + * console driver call chain. For example, vt_console_print() + * takes a spinlock and then can call into fbcon_redraw(), + * which can conditionally invoke cond_resched(). + */ + console_may_schedule = 0; + console_emit_next_record(con, text, ext_text, dropped_text, false); + + seq = con->seq; + + console_kthread_printing_exit(); + + mutex_unlock(&con->lock); + } + + con_printk(KERN_INFO, con, "printing thread stopped\n"); +out: + kfree(dropped_text); + kfree(ext_text); + kfree(text); + + console_lock(); + /* + * If this kthread is being stopped by another task, con->thread will + * already be NULL. That is fine. The important thing is that it is + * NULL after the kthread exits. + */ + con->thread = NULL; + console_unlock(); + + return 0; +} + +/* Must be called under console_lock. */ +static void printk_start_kthread(struct console *con) +{ + /* + * Do not start a kthread if there is no write() callback. The + * kthreads assume the write() callback exists. + */ + if (!con->write) + return; + + con->thread = kthread_run(printk_kthread_func, con, + "pr/%s%d", con->name, con->index); + if (IS_ERR(con->thread)) { + con->thread = NULL; + con_printk(KERN_ERR, con, "unable to start printing thread\n"); + __printk_fallback_preferred_direct(); + return; + } +} + /* * Delayed printk version, for scheduler-internal messages: */ -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_OUTPUT 0x02 +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); @@ -3456,10 +4165,14 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_OUTPUT) { + if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { + printk_prefer_direct_enter(); + /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); + + printk_prefer_direct_exit(); } if (pending & PRINTK_PENDING_WAKEUP) @@ -3484,10 +4197,11 @@ static void __wake_up_klogd(int val) * prepare_to_wait_event(), which is called after ___wait_event() adds * the waiter but before it has checked the wait condition. * - * This pairs with devkmsg_read:A and syslog_print:A. + * This pairs with devkmsg_read:A, syslog_print:A, and + * printk_kthread_func:A. */ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ - (val & PRINTK_PENDING_OUTPUT)) { + (val & PRINTK_PENDING_DIRECT_OUTPUT)) { this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } @@ -3527,7 +4241,17 @@ void defer_console_output(void) * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ - __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); + int val = PRINTK_PENDING_WAKEUP; + + /* + * Make sure that some context will print the messages when direct + * printing is allowed. This happens in situations when the kthreads + * may not be as reliable or perhaps unusable. + */ + if (allow_direct_printing()) + val |= PRINTK_PENDING_DIRECT_OUTPUT; + + __wake_up_klogd(val); } void printk_trigger_flush(void) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 6d10927a07d8..8e8fd2fb0a5b 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -8,7 +8,9 @@ #include #include #include +#include #include +#include #include "internal.h" @@ -45,3 +47,33 @@ asmlinkage int vprintk(const char *fmt, va_list args) return vprintk_default(fmt, args); } EXPORT_SYMBOL(vprintk); + +/** + * try_block_console_kthreads() - Try to block console kthreads and + * make the global console_lock() avaialble + * + * @timeout_ms: The maximum time (in ms) to wait. + * + * Prevent console kthreads from starting processing new messages. Wait + * until the global console_lock() become available. + * + * Context: Can be called in any context. + */ +void try_block_console_kthreads(int timeout_ms) +{ + block_console_kthreads = true; + + /* Do not wait when the console lock could not be safely taken. */ + if (this_cpu_read(printk_context) || in_nmi()) + return; + + while (timeout_ms > 0) { + if (console_trylock()) { + console_unlock(); + return; + } + + udelay(1000); + timeout_ms -= 1; + } +} diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 503c2aa845a4..dcd8c0e44c00 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2363,6 +2363,12 @@ static int rcutorture_booster_init(unsigned int cpu) WARN_ON_ONCE(!t); sp.sched_priority = 2; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); +#ifdef CONFIG_PREEMPT_RT + t = per_cpu(timersd, cpu); + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); +#endif } /* Don't allow time recalculation while creating a new task. */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 5653560573e2..dcbbcf93d608 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -642,6 +642,7 @@ static void print_cpu_stall(unsigned long gps) * See Documentation/RCU/stallwarn.rst for info on how to debug * RCU CPU stall warnings. */ + printk_prefer_direct_enter(); trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); @@ -676,6 +677,7 @@ static void print_cpu_stall(unsigned long gps) */ set_tsk_need_resched(current); set_preempt_need_resched(); + printk_prefer_direct_exit(); } static void check_cpu_stall(struct rcu_data *rdp) diff --git a/kernel/reboot.c b/kernel/reboot.c index 3bba88c7ffc6..57cedc330660 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -82,6 +82,7 @@ void kernel_restart_prepare(char *cmd) { blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; + try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -282,6 +283,7 @@ static void kernel_shutdown_prepare(enum system_states state) blocking_notifier_call_chain(&reboot_notifier_list, (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); system_state = state; + try_block_console_kthreads(10000); usermodehelper_disable(); device_shutdown(); } @@ -836,9 +838,11 @@ static int __orderly_reboot(void) ret = run_cmd(reboot_cmd); if (ret) { + printk_prefer_direct_enter(); pr_warn("Failed to start orderly reboot: forcing the issue\n"); emergency_sync(); kernel_restart(NULL); + printk_prefer_direct_exit(); } return ret; @@ -851,6 +855,7 @@ static int __orderly_poweroff(bool force) ret = run_cmd(poweroff_cmd); if (ret && force) { + printk_prefer_direct_enter(); pr_warn("Failed to start orderly shutdown: forcing the issue\n"); /* @@ -860,6 +865,7 @@ static int __orderly_poweroff(bool force) */ emergency_sync(); kernel_power_off(); + printk_prefer_direct_exit(); } return ret; @@ -917,6 +923,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot); */ static void hw_failure_emergency_poweroff_func(struct work_struct *work) { + printk_prefer_direct_enter(); + /* * We have reached here after the emergency shutdown waiting period has * expired. This means orderly_poweroff has not been able to shut off @@ -933,6 +941,8 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work) */ pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); emergency_restart(); + + printk_prefer_direct_exit(); } static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, @@ -971,11 +981,13 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) { static atomic_t allow_proceed = ATOMIC_INIT(1); + printk_prefer_direct_enter(); + pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); /* Shutdown should be initiated only once. */ if (!atomic_dec_and_test(&allow_proceed)) - return; + goto out; /* * Queue a backup emergency shutdown in the event of @@ -983,6 +995,8 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) */ hw_failure_emergency_poweroff(ms_until_forced); orderly_poweroff(true); +out: + printk_prefer_direct_exit(); } EXPORT_SYMBOL_GPL(hw_protection_shutdown); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0f6a92737c91..f26292bed420 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1040,6 +1040,46 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } +#ifdef CONFIG_PREEMPT_LAZY + +static int tsk_is_polling(struct task_struct *p) +{ +#ifdef TIF_POLLING_NRFLAG + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); +#else + return 0; +#endif +} + +void resched_curr_lazy(struct rq *rq) +{ + struct task_struct *curr = rq->curr; + int cpu; + + if (!sched_feat(PREEMPT_LAZY)) { + resched_curr(rq); + return; + } + + if (test_tsk_need_resched(curr)) + return; + + if (test_tsk_need_resched_lazy(curr)) + return; + + set_tsk_need_resched_lazy(curr); + + cpu = cpu_of(rq); + if (cpu == smp_processor_id()) + return; + + /* NEED_RESCHED_LAZY must be visible before we test polling */ + smp_mb(); + if (!tsk_is_polling(curr)) + smp_send_reschedule(cpu); +} +#endif + void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -2224,6 +2264,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; + preempt_lazy_disable(); preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); @@ -2255,6 +2296,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; + preempt_lazy_enable(); preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); @@ -3277,6 +3319,76 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, } #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_PREEMPT_RT + +/* + * Consider: + * + * set_special_state(X); + * + * do_things() + * // Somewhere in there is an rtlock that can be contended: + * current_save_and_set_rtlock_wait_state(); + * [...] + * schedule_rtlock(); (A) + * [...] + * current_restore_rtlock_saved_state(); + * + * schedule(); (B) + * + * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an + * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its + * state to X. For things like ptrace (X=TASK_TRACED), the task could have more + * work to do upon acquiring the lock in do_things() before whoever called + * wait_task_inactive() should return. IOW, we have to wait for: + * + * p.saved_state = TASK_RUNNING + * p.__state = X + * + * which implies the task isn't blocked on an RT lock and got to schedule() (B). + * + * Also see comments in ttwu_state_match(). + */ + +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) +{ + unsigned long flags; + bool mismatch; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + if (READ_ONCE(p->__state) & match_state) + mismatch = false; + else if (READ_ONCE(p->saved_state) & match_state) + mismatch = false; + else + mismatch = true; + + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + return mismatch; +} +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, + bool *wait) +{ + if (READ_ONCE(p->__state) & match_state) + return true; + if (READ_ONCE(p->saved_state) & match_state) { + *wait = true; + return true; + } + return false; +} +#else +static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) +{ + return !(READ_ONCE(p->__state) & match_state); +} +static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, + bool *wait) +{ + return (READ_ONCE(p->__state) & match_state); +} +#endif + /* * wait_task_inactive - wait for a thread to unschedule. * @@ -3295,7 +3407,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, */ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) { - int running, queued; + bool running, wait; struct rq_flags rf; unsigned long ncsw; struct rq *rq; @@ -3321,7 +3433,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state * is actually now running somewhere else! */ while (task_on_cpu(rq, p)) { - if (!(READ_ONCE(p->__state) & match_state)) + if (state_mismatch(p, match_state)) return 0; cpu_relax(); } @@ -3334,9 +3446,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state rq = task_rq_lock(p, &rf); trace_sched_wait_task(p); running = task_on_cpu(rq, p); - queued = task_on_rq_queued(p); + wait = task_on_rq_queued(p); ncsw = 0; - if (READ_ONCE(p->__state) & match_state) + + if (state_match(p, match_state, &wait)) ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &rf); @@ -3366,7 +3479,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state * running right now), it's preempted, and we should * yield - it could be a while. */ - if (unlikely(queued)) { + if (unlikely(wait)) { ktime_t to = NSEC_PER_SEC / HZ; set_current_state(TASK_UNINTERRUPTIBLE); @@ -4647,6 +4760,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->on_cpu = 0; #endif init_task_preempt_count(p); +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(p)->preempt_lazy_count = 0; +#endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); @@ -6517,6 +6633,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); + clear_tsk_need_resched_lazy(prev); clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; @@ -6731,6 +6848,30 @@ static void __sched notrace preempt_schedule_common(void) } while (need_resched()); } +#ifdef CONFIG_PREEMPT_LAZY +/* + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as + * preempt_lazy_count counter >0. + */ +static __always_inline int preemptible_lazy(void) +{ + if (test_thread_flag(TIF_NEED_RESCHED)) + return 1; + if (current_thread_info()->preempt_lazy_count) + return 0; + return 1; +} + +#else + +static inline int preemptible_lazy(void) +{ + return 1; +} + +#endif + #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption @@ -6744,6 +6885,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) */ if (likely(!preemptible())) return; + if (!preemptible_lazy()) + return; preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); @@ -6791,6 +6934,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) if (likely(!preemptible())) return; + if (!preemptible_lazy()) + return; + do { /* * Because the function tracer can trace preempt_count_sub() @@ -9056,7 +9202,9 @@ void __init init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); - +#ifdef CONFIG_HAVE_PREEMPT_LAZY + task_thread_info(idle)->preempt_lazy_count = 0; +#endif /* * The idle tasks have their own, simple scheduling class: */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 612873ec2197..9493473e8e16 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4930,7 +4930,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. @@ -4954,7 +4954,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); } static void @@ -5100,7 +5100,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); return; } /* @@ -5249,7 +5249,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); } static __always_inline @@ -6000,7 +6000,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (task_current(rq, p)) - resched_curr(rq); + resched_curr_lazy(rq); return; } hrtick_start(rq, delta); @@ -7729,7 +7729,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: - resched_curr(rq); + resched_curr_lazy(rq); /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved @@ -11892,7 +11892,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); - resched_curr(rq); + resched_curr_lazy(rq); } se->vruntime -= cfs_rq->min_vruntime; @@ -11919,7 +11919,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (task_current(rq, p)) { if (p->prio > oldprio) - resched_curr(rq); + resched_curr_lazy(rq); } else check_preempt_curr(rq, p, 0); } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index ee7f23c76bd3..e13090e33f3c 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) #ifdef CONFIG_PREEMPT_RT SCHED_FEAT(TTWU_QUEUE, false) +# ifdef CONFIG_PREEMPT_LAZY +SCHED_FEAT(PREEMPT_LAZY, true) +# endif #else /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b62d53d7c264..f2577f511a41 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2350,6 +2350,15 @@ extern void reweight_task(struct task_struct *p, int prio); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); +#ifdef CONFIG_PREEMPT_LAZY +extern void resched_curr_lazy(struct rq *rq); +#else +static inline void resched_curr_lazy(struct rq *rq) +{ + resched_curr(rq); +} +#endif + extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); diff --git a/kernel/signal.c b/kernel/signal.c index 5d45f5da2b36..58e919c7c936 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2302,13 +2302,13 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, /* * Don't want to allow preemption here, because * sys_ptrace() needs this task to be inactive. - * - * XXX: implement read_unlock_no_resched(). */ - preempt_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); read_unlock(&tasklist_lock); cgroup_enter_frozen(); - preempt_enable_no_resched(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable_no_resched(); schedule(); cgroup_leave_frozen(true); diff --git a/kernel/softirq.c b/kernel/softirq.c index c8a6913c067d..82f3e68fbe22 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -637,6 +637,24 @@ static inline void tick_irq_exit(void) #endif } +#ifdef CONFIG_PREEMPT_RT +DEFINE_PER_CPU(struct task_struct *, timersd); +DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + +static void wake_timersd(void) +{ + struct task_struct *tsk = __this_cpu_read(timersd); + + if (tsk) + wake_up_process(tsk); +} + +#else + +static inline void wake_timersd(void) { } + +#endif + static inline void __irq_exit_rcu(void) { #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED @@ -649,6 +667,10 @@ static inline void __irq_exit_rcu(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); + if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() && + !(in_nmi() | in_hardirq())) + wake_timersd(); + tick_irq_exit(); } @@ -976,12 +998,70 @@ static struct smp_hotplug_thread softirq_threads = { .thread_comm = "ksoftirqd/%u", }; +#ifdef CONFIG_PREEMPT_RT +static void timersd_setup(unsigned int cpu) +{ + sched_set_fifo_low(current); +} + +static int timersd_should_run(unsigned int cpu) +{ + return local_pending_timers(); +} + +static void run_timersd(unsigned int cpu) +{ + unsigned int timer_si; + + ksoftirqd_run_begin(); + + timer_si = local_pending_timers(); + __this_cpu_write(pending_timer_softirq, 0); + or_softirq_pending(timer_si); + + __do_softirq(); + + ksoftirqd_run_end(); +} + +static void raise_ktimers_thread(unsigned int nr) +{ + trace_softirq_raise(nr); + __this_cpu_or(pending_timer_softirq, 1 << nr); +} + +void raise_hrtimer_softirq(void) +{ + raise_ktimers_thread(HRTIMER_SOFTIRQ); +} + +void raise_timer_softirq(void) +{ + unsigned long flags; + + local_irq_save(flags); + raise_ktimers_thread(TIMER_SOFTIRQ); + wake_timersd(); + local_irq_restore(flags); +} + +static struct smp_hotplug_thread timer_threads = { + .store = &timersd, + .setup = timersd_setup, + .thread_should_run = timersd_should_run, + .thread_fn = run_timersd, + .thread_comm = "ktimers/%u", +}; +#endif + static __init int spawn_ksoftirqd(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); - +#ifdef CONFIG_PREEMPT_RT + BUG_ON(smpboot_register_percpu_thread(&timer_threads)); +#endif return 0; } early_initcall(spawn_ksoftirqd); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index e4f0e3b0c4f4..9e2cfba065de 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - raise_softirq_irqoff(HRTIMER_SOFTIRQ); + raise_hrtimer_softirq(); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); @@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void) if (!ktime_before(now, cpu_base->softirq_expires_next)) { cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->softirq_activated = 1; - raise_softirq_irqoff(HRTIMER_SOFTIRQ); + raise_hrtimer_softirq(); } __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 798e1841d286..b52e1861b913 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -800,7 +800,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static inline bool local_timer_softirq_pending(void) { - return local_softirq_pending() & BIT(TIMER_SOFTIRQ); + return local_pending_timers() & BIT(TIMER_SOFTIRQ); } static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 717fcb9fb14a..e6219da89933 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1822,7 +1822,7 @@ static void run_local_timers(void) if (time_before(jiffies, base->next_expiry)) return; } - raise_softirq(TIMER_SOFTIRQ); + raise_timer_softirq(); } /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9db92a6e1463..0efcfac975b6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2658,11 +2658,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) trace_flags |= TRACE_FLAG_BH_OFF; - if (tif_need_resched()) + if (tif_need_resched_now()) trace_flags |= TRACE_FLAG_NEED_RESCHED; +#ifdef CONFIG_PREEMPT_LAZY + /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */ + if (need_resched_lazy()) + trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; +#else if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; - return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | +#endif + + return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | + (preempt_lazy_count() & 0xff) << 16 | (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } @@ -4255,15 +4263,17 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off/BH-disabled\n" - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" - "# |||| / _-=> migrate-disable \n" - "# ||||| / delay \n" - "# cmd pid |||||| time | caller \n" - "# \\ / |||||| \\ | / \n"); + seq_puts(m, "# _--------=> CPU# \n" + "# / _-------=> irqs-off/BH-disabled\n" + "# | / _------=> need-resched \n" + "# || / _-----=> need-resched-lazy\n" + "# ||| / _----=> hardirq/softirq \n" + "# |||| / _---=> preempt-depth \n" + "# ||||| / _--=> preempt-lazy-depth\n" + "# |||||| / _-=> migrate-disable \n" + "# ||||||| / delay \n" + "# cmd pid |||||||| time | caller \n" + "# \\ / |||||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -4297,14 +4307,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); - seq_printf(m, "# %.*s|||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); + seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space); + seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); + seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); + seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); + seq_printf(m, "# %.*s|||||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); } void diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9da418442a06..1d8c513d1121 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -208,6 +208,7 @@ static int trace_define_common_fields(void) /* Holds both preempt_count and migrate_disable */ __common_field(unsigned char, preempt_count); __common_field(int, pid); + __common_field(unsigned char, preempt_lazy_count); return ret; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 5cd4fb656306..3c227e2843ae 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) { char hardsoft_irq; char need_resched; + char need_resched_lazy; char irqs_off; int hardirq; int softirq; @@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED)) { +#ifndef CONFIG_PREEMPT_LAZY case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: need_resched = 'N'; break; +#endif case TRACE_FLAG_NEED_RESCHED: need_resched = 'n'; break; +#ifndef CONFIG_PREEMPT_LAZY case TRACE_FLAG_PREEMPT_RESCHED: need_resched = 'p'; break; +#endif default: need_resched = '.'; break; } + need_resched_lazy = + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; + hardsoft_irq = (nmi && hardirq) ? 'Z' : nmi ? 'z' : @@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) softirq ? 's' : '.' ; - trace_seq_printf(s, "%c%c%c", - irqs_off, need_resched, hardsoft_irq); + trace_seq_printf(s, "%c%c%c%c", + irqs_off, need_resched, need_resched_lazy, + hardsoft_irq); if (entry->preempt_count & 0xf) trace_seq_printf(s, "%x", entry->preempt_count & 0xf); else trace_seq_putc(s, '.'); + if (entry->preempt_lazy_count) + trace_seq_printf(s, "%x", entry->preempt_lazy_count); + else + trace_seq_putc(s, '.'); + if (entry->preempt_count & 0xf0) trace_seq_printf(s, "%x", entry->preempt_count >> 4); else diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8e61f21e7e33..41596c415111 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -424,6 +424,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* Start period for the next softlockup warning. */ update_report_ts(); + printk_prefer_direct_enter(); + pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -442,6 +444,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); if (softlockup_panic) panic("softlockup: hung tasks"); + + printk_prefer_direct_exit(); } return HRTIMER_RESTART; diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 1e8a49dc956e..7c977b945c92 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -135,6 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event, if (__this_cpu_read(hard_watchdog_warn) == true) return; + printk_prefer_direct_enter(); + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu); print_modules(); @@ -155,6 +157,8 @@ static void watchdog_overflow_callback(struct perf_event *event, if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + printk_prefer_direct_exit(); + __this_cpu_write(hard_watchdog_warn, true); return; } -- 2.34.1