diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey.inc b/meta-digi-arm/recipes-kernel/linux/linux-dey.inc
index d9c4603c1..87f6ec74c 100644
--- a/meta-digi-arm/recipes-kernel/linux/linux-dey.inc
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey.inc
@@ -71,6 +71,20 @@ do_configure:append() {
 	if [ -n "${@' '.join(find_cfgs(d))}" ]; then
 		${S}/scripts/kconfig/merge_config.sh -m -O ${B} ${B}/.config ${@" ".join(find_cfgs(d))}
 	fi
+	# Apply ST-specific config fragments (ending in .config and stored in a different folder)
+	if [ ! -z "${KERNEL_CONFIG_FRAGMENTS}" ]; then
+		for f in ${KERNEL_CONFIG_FRAGMENTS}
+		do
+			# Check if the config fragment was copied into the WORKDIR from
+			# the OE meta data
+			if [ ! -e "$f" ]; then
+				bb_warn "Could not find kernel config fragment $f"
+				exit 1
+			fi
+		done
+		# Now that all the fragments are located merge them.
+		(${S}/scripts/kconfig/merge_config.sh -m -r -O ${B} ${B}/.config ${KERNEL_CONFIG_FRAGMENTS} 1>&2 )
+	fi
 }
 
 # Don't create custom folder for kernel artifacts
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0023-5.15-stm32mp-rt-49-r1-CLOCK.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0023-5.15-stm32mp-rt-49-r1-CLOCK.patch
new file mode 100644
index 000000000..41180995b
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0023-5.15-stm32mp-rt-49-r1-CLOCK.patch
@@ -0,0 +1,26 @@
+From 63e709173a20b85b473bbf4832f4e909692fd361 Mon Sep 17 00:00:00 2001
+From: Lionel VITTE <lionel.vitte@st.com>
+Date: Wed, 8 Feb 2023 09:54:24 +0100
+Subject: [PATCH 23/28] 5.15-stm32mp-rt-49-r1 CLOCK
+
+Signed-off-by: Lionel VITTE <lionel.vitte@st.com>
+---
+ drivers/clk/stm32/clk-stm32mp13.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/clk/stm32/clk-stm32mp13.c b/drivers/clk/stm32/clk-stm32mp13.c
+index 15ee05df8..2f7a823bf 100644
+--- a/drivers/clk/stm32/clk-stm32mp13.c
++++ b/drivers/clk/stm32/clk-stm32mp13.c
+@@ -840,7 +840,7 @@ static CLK_STM32_GATE(sai1, "pclk2", 0, GATE_SAI1);
+ static CLK_STM32_GATE(sai2, "pclk2", 0, GATE_SAI2);
+ static CLK_STM32_GATE(spi1, "pclk2", 0, GATE_SPI1);
+ 
+-static CLK_STM32_GATE(syscfg, "pclk3", 0, GATE_SYSCFG);
++static CLK_STM32_GATE(syscfg, "pclk3", CLK_IS_CRITICAL, GATE_SYSCFG);
+ static CLK_STM32_GATE(vref, "pclk3", 0, GATE_VREF);
+ static CLK_STM32_GATE(dts, "pclk3", 0, GATE_DTS);
+ static CLK_STM32_GATE(pmbctrl, "pclk3", 0, GATE_PMBCTRL);
+-- 
+2.34.1
+
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0024-5.15-stm32mp-rt-49-r1-DMA.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0024-5.15-stm32mp-rt-49-r1-DMA.patch
new file mode 100644
index 000000000..af368b2d0
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0024-5.15-stm32mp-rt-49-r1-DMA.patch
@@ -0,0 +1,131 @@
+From 5a55de398d12848f13f7df59fb2f1853b7dd9ee8 Mon Sep 17 00:00:00 2001
+From: Lionel VITTE <lionel.vitte@st.com>
+Date: Wed, 8 Feb 2023 09:56:07 +0100
+Subject: [PATCH 24/28] 5.15-stm32mp-rt-49-r1 DMA
+
+Signed-off-by: Lionel VITTE <lionel.vitte@st.com>
+---
+ drivers/dma/stm32-dma.c  | 35 +++++++++++++++++++++++++----------
+ drivers/dma/stm32-mdma.c |  4 ++++
+ 2 files changed, 29 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
+index 7c6078c6c..128edfb4f 100644
+--- a/drivers/dma/stm32-dma.c
++++ b/drivers/dma/stm32-dma.c
+@@ -238,6 +238,7 @@ struct stm32_dma_chan {
+ 	u32 residue_after_drain;
+ 	struct workqueue_struct *mdma_wq;
+ 	struct work_struct mdma_work;
++	struct completion mdma_drain_completion;
+ };
+ 
+ struct stm32_dma_device {
+@@ -570,8 +571,9 @@ static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan)
+ 	return ndtr << width;
+ }
+ 
+-static int stm32_dma_mdma_drain(struct stm32_dma_chan *chan)
++static void stm32_dma_mdma_drain_worker(struct work_struct *work)
+ {
++	struct stm32_dma_chan *chan = container_of(work, struct stm32_dma_chan, mdma_work);
+ 	struct stm32_dma_mdma *mchan = &chan->mchan;
+ 	struct stm32_dma_sg_req *sg_req;
+ 	struct dma_device *ddev = mchan->chan->device;
+@@ -583,14 +585,12 @@ static int stm32_dma_mdma_drain(struct stm32_dma_chan *chan)
+ 	int ret;
+ 	unsigned long flags;
+ 
+-	flush_workqueue(chan->mdma_wq);
+-
+ 	/* DMA/MDMA chain: drain remaining data in SRAM */
+ 
+ 	/* Get the residue on MDMA side */
+ 	status = dmaengine_tx_status(mchan->chan, mchan->chan->cookie, &state);
+ 	if (status == DMA_COMPLETE)
+-		return status;
++		goto mdma_complete;
+ 
+ 	mdma_residue = state.residue;
+ 	sg_req = &chan->desc->sg_req[chan->next_sg - 1];
+@@ -623,24 +623,25 @@ static int stm32_dma_mdma_drain(struct stm32_dma_chan *chan)
+ 		desc = ddev->device_prep_dma_memcpy(mchan->chan, dst_buf, src_buf, dma_to_write,
+ 						    DMA_PREP_INTERRUPT);
+ 		if (!desc)
+-			return -EINVAL;
++			return;
+ 
+ 		ret = dma_submit_error(dmaengine_submit(desc));
+ 		if (ret < 0)
+-			return ret;
++			return;
+ 
+ 		status = dma_wait_for_async_tx(desc);
+ 		if (status != DMA_COMPLETE) {
+ 			dev_err(chan2dev(chan), "%s dma_wait_for_async_tx error\n", __func__);
+ 			dmaengine_terminate_async(mchan->chan);
+-			return -EBUSY;
++			return;
+ 		}
+ 
+ 		/* We need to store residue for tx_status() */
+ 		chan->residue_after_drain = len - (mdma_wrote + dma_to_write);
+ 	}
+ 
+-	return 0;
++mdma_complete:
++	complete(&chan->mdma_drain_completion);
+ }
+ 
+ static void stm32_dma_synchronize(struct dma_chan *c)
+@@ -648,9 +649,22 @@ static void stm32_dma_synchronize(struct dma_chan *c)
+ 	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+ 	struct stm32_dma_mdma *mchan = &chan->mchan;
+ 
+-	if (chan->desc && chan->use_mdma && mchan->dir == DMA_DEV_TO_MEM)
+-		if (stm32_dma_mdma_drain(chan))
++	if (chan->desc && chan->use_mdma && mchan->dir == DMA_DEV_TO_MEM) {
++		unsigned long ms = 5000 + 100; /* dma_sync_wait_timeout + extra 100ms */
++
++		reinit_completion(&chan->mdma_drain_completion);
++
++		flush_workqueue(chan->mdma_wq);
++		INIT_WORK(&chan->mdma_work, stm32_dma_mdma_drain_worker);
++
++		if (!queue_work(chan->mdma_wq, &chan->mdma_work))
++			dev_warn(chan2dev(chan), "Work already queued\n");
++
++		ms = wait_for_completion_timeout(&chan->mdma_drain_completion,
++						 msecs_to_jiffies(ms));
++		if (ms == 0)
+ 			dev_err(chan2dev(chan), "%s: can't drain DMA\n", __func__);
++	}
+ 
+ 	if (chan->use_mdma)
+ 		dmaengine_synchronize(mchan->chan);
+@@ -2338,6 +2352,7 @@ static int stm32_dma_probe(struct platform_device *pdev)
+ 					dev_warn(&pdev->dev,
+ 						 "can't alloc MDMA workqueue for %s\n", name);
+ 				}
++				init_completion(&chan->mdma_drain_completion);
+ 			}
+ 		}
+ 	}
+diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
+index 133534663..a08c94638 100644
+--- a/drivers/dma/stm32-mdma.c
++++ b/drivers/dma/stm32-mdma.c
+@@ -1270,6 +1270,10 @@ static int stm32_mdma_resume(struct dma_chan *c)
+ 	unsigned long flags;
+ 	u32 status, reg;
+ 
++	/* Transfer can be terminated */
++	if (!chan->desc || (stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)) & STM32_MDMA_CCR_EN))
++		return -EPERM;
++
+ 	hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc;
+ 
+ 	spin_lock_irqsave(&chan->vchan.lock, flags);
+-- 
+2.34.1
+
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0025-5.15-stm32mp-rt-49-r1-MFD.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0025-5.15-stm32mp-rt-49-r1-MFD.patch
new file mode 100644
index 000000000..80d4f531e
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0025-5.15-stm32mp-rt-49-r1-MFD.patch
@@ -0,0 +1,27 @@
+From be5ec688053e6d136bc8ea54ed1e93d523b24580 Mon Sep 17 00:00:00 2001
+From: Lionel VITTE <lionel.vitte@st.com>
+Date: Wed, 8 Feb 2023 09:56:45 +0100
+Subject: [PATCH 25/28] 5.15-stm32mp-rt-49-r1 MFD
+
+Signed-off-by: Lionel VITTE <lionel.vitte@st.com>
+---
+ drivers/mfd/syscon.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
+index 191fdb87c..24530dfe5 100644
+--- a/drivers/mfd/syscon.c
++++ b/drivers/mfd/syscon.c
+@@ -38,6 +38,9 @@ static const struct regmap_config syscon_regmap_config = {
+ 	.reg_bits = 32,
+ 	.val_bits = 32,
+ 	.reg_stride = 4,
++#ifdef CONFIG_PREEMPT_RT
++	.use_raw_spinlock = true,
++#endif
+ };
+ 
+ static struct syscon *of_syscon_register(struct device_node *np, bool check_clk)
+-- 
+2.34.1
+
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0026-5.15-stm32mp-rt-49-r1-NET-TTY.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0026-5.15-stm32mp-rt-49-r1-NET-TTY.patch
new file mode 100644
index 000000000..4bcaaebfb
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0026-5.15-stm32mp-rt-49-r1-NET-TTY.patch
@@ -0,0 +1,64 @@
+From 1f4b70cda804c4f3771902254a2614d87a1d366c Mon Sep 17 00:00:00 2001
+From: Lionel VITTE <lionel.vitte@st.com>
+Date: Wed, 8 Feb 2023 09:57:06 +0100
+Subject: [PATCH 26/28] 5.15-stm32mp-rt-49-r1 NET-TTY
+
+Signed-off-by: Lionel VITTE <lionel.vitte@st.com>
+---
+ drivers/tty/serial/stm32-usart.c | 32 +++++++++++---------------------
+ 1 file changed, 11 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
+index 4d7a31664..0cd8e9672 100644
+--- a/drivers/tty/serial/stm32-usart.c
++++ b/drivers/tty/serial/stm32-usart.c
+@@ -772,26 +772,16 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr)
+ 	}
+ 
+ 	if ((sr & USART_SR_RTOF) && !(stm32_port->throttled) &&
+-	    stm32_usart_rx_dma_started(stm32_port))
+-		return IRQ_WAKE_THREAD;
+-	else
+-		return IRQ_HANDLED;
+-}
+-
+-static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr)
+-{
+-	struct uart_port *port = ptr;
+-	struct tty_port *tport = &port->state->port;
+-	unsigned int size;
+-	unsigned long flags;
+-
+-	/* Receiver timeout irq for DMA RX */
+-	spin_lock_irqsave(&port->lock, flags);
+-	size = stm32_usart_receive_chars(port, false);
+-	uart_unlock_and_check_sysrq_irqrestore(port, flags);
+-	if (size)
+-		tty_flip_buffer_push(tport);
++	    stm32_usart_rx_dma_started(stm32_port)) {
++		unsigned long flags;
+ 
++		spin_lock_irqsave(&port->lock, flags);
++		/* Receiver timeout irq for DMA RX */
++		size = stm32_usart_receive_chars(port, false);
++		uart_unlock_and_check_sysrq_irqrestore(port, flags);
++		if (size)
++			tty_flip_buffer_push(tport);
++	}
+ 	return IRQ_HANDLED;
+ }
+ 
+@@ -980,8 +970,8 @@ static int stm32_usart_startup(struct uart_port *port)
+ 	u32 val;
+ 	int ret;
+ 
+-	ret = request_threaded_irq(port->irq, stm32_usart_interrupt,
+-				   stm32_usart_threaded_interrupt,
++	ret = request_threaded_irq(port->irq, NULL,
++				   stm32_usart_interrupt,
+ 				   IRQF_ONESHOT | IRQF_NO_SUSPEND,
+ 				   name, port);
+ 	if (ret)
+-- 
+2.34.1
+
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0027-5.15-stm32mp-rt-49-r1-DEVICETREE.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0027-5.15-stm32mp-rt-49-r1-DEVICETREE.patch
new file mode 100644
index 000000000..dac422be8
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0027-5.15-stm32mp-rt-49-r1-DEVICETREE.patch
@@ -0,0 +1,25 @@
+From e1bd8bc5502e661be4feaadfca1889da1d48cd73 Mon Sep 17 00:00:00 2001
+From: Lionel VITTE <lionel.vitte@st.com>
+Date: Wed, 8 Feb 2023 09:57:43 +0100
+Subject: [PATCH 27/28] 5.15-stm32mp-rt-49-r1 DEVICETREE
+
+Signed-off-by: Lionel VITTE <lionel.vitte@st.com>
+---
+ arch/arm/boot/dts/stm32mp131.dtsi | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/stm32mp131.dtsi b/arch/arm/boot/dts/stm32mp131.dtsi
+index 8121ddc97..3fc06961a 100644
+--- a/arch/arm/boot/dts/stm32mp131.dtsi
++++ b/arch/arm/boot/dts/stm32mp131.dtsi
+@@ -1241,7 +1241,6 @@ exti-interrupt-map {
+ 		syscfg: syscon@50020000 {
+ 			compatible = "st,stm32mp157-syscfg", "syscon";
+ 			reg = <0x50020000 0x400>;
+-			clocks = <&rcc SYSCFG>;
+ 		};
+ 
+ 		lptimer2: timer@50021000 {
+-- 
+2.34.1
+
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0028-5.15-stm32mp-rt-49-r1-CONFIG.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0028-5.15-stm32mp-rt-49-r1-CONFIG.patch
new file mode 100644
index 000000000..e697ee7e5
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/0028-5.15-stm32mp-rt-49-r1-CONFIG.patch
@@ -0,0 +1,82 @@
+From 05ea3c26ccad3359d94dbe3c7ba758c2ba2f7dd9 Mon Sep 17 00:00:00 2001
+From: Lionel VITTE <lionel.vitte@st.com>
+Date: Wed, 8 Feb 2023 09:59:08 +0100
+Subject: [PATCH 28/28] 5.15-stm32mp-rt-49-r1 CONFIG
+
+Signed-off-by: Lionel VITTE <lionel.vitte@st.com>
+---
+ .../configs/fragment-07-rt-sysvinit.config    | 12 +++++++
+ arch/arm/configs/fragment-07-rt.config        | 32 +++++++++++++++++++
+ arch/arm/configs/fragment-08-rt-mp13.config   |  2 ++
+ 3 files changed, 46 insertions(+)
+ create mode 100644 arch/arm/configs/fragment-07-rt-sysvinit.config
+ create mode 100644 arch/arm/configs/fragment-07-rt.config
+ create mode 100644 arch/arm/configs/fragment-08-rt-mp13.config
+
+diff --git a/arch/arm/configs/fragment-07-rt-sysvinit.config b/arch/arm/configs/fragment-07-rt-sysvinit.config
+new file mode 100644
+index 000000000..49a4baf60
+--- /dev/null
++++ b/arch/arm/configs/fragment-07-rt-sysvinit.config
+@@ -0,0 +1,12 @@
++CONFIG_CGROUPS=y
++# CONFIG_CGROUP_SCHED is not set
++# CONFIG_CGROUP_PIDS is not set
++# CONFIG_CGROUP_RDMA is not set
++# CONFIG_CGROUP_FREEZER is not set
++# CONFIG_CGROUP_DEVICE is not set
++# CONFIG_CGROUP_CPUACCT is not set
++# CONFIG_CGROUP_PERF is not set
++# CONFIG_CGROUP_DEBUG is not set
++# CONFIG_CGROUP_NET_PRIO is not set
++# CONFIG_CGROUP_NET_CLASSID is not set
++
+diff --git a/arch/arm/configs/fragment-07-rt.config b/arch/arm/configs/fragment-07-rt.config
+new file mode 100644
+index 000000000..98bb8735f
+--- /dev/null
++++ b/arch/arm/configs/fragment-07-rt.config
+@@ -0,0 +1,32 @@
++CONFIG_PREEMPT_RT=y
++
++# disable SCHED_MC
++# CONFIG_MCPM is not set
++
++# Disable CPUFREQ and CPUIDLE
++# CONFIG_CPU_FREQ is not set
++# CONFIG_CPU_IDLE is not set
++
++# Force to have HIGH_RES_TIMERS
++CONFIG_HIGH_RES_TIMERS=y
++
++# force do not go to sleep
++# For multiple core, you should set the specific boot options
++# for isolate the core and render it tickless: "isolcpus=2,3 nohz_full=2,3"
++# Warning: to active only if SMP are present
++# CONFIG_HZ_PERIODIC=y
++
++# to Enable ftrace, you need to enable the following configuraiton:
++# CONFIG_FTRACE=y
++# CONFIG_IRQSOFF_TRACER=y
++# CONFIG_PREEMPT_TRACER=y
++# CONFIG_SCHED_TRACER=y
++# CONFIG_FUNCTION_TRACER=y
++# By default, the ftrace for RT kernel are disabled
++# CONFIG_FTRACE is not set
++# CONFIG_IRQSOFF_TRACER is not set
++# CONFIG_PREEMPT_TRACER is not set
++# CONFIG_SCHED_TRACER is not set
++# CONFIG_FUNCTION_TRACER is not set
++
++
+diff --git a/arch/arm/configs/fragment-08-rt-mp13.config b/arch/arm/configs/fragment-08-rt-mp13.config
+new file mode 100644
+index 000000000..c70d7adc6
+--- /dev/null
++++ b/arch/arm/configs/fragment-08-rt-mp13.config
+@@ -0,0 +1,2 @@
++# Disable SMP on MP13
++# CONFIG_SMP is not set
+-- 
+2.34.1
+
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/patch-5.15.119-rt65.patch b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/patch-5.15.119-rt65.patch
new file mode 100644
index 000000000..b2cf5ed68
--- /dev/null
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey/ccmp1/patch-5.15.119-rt65.patch
@@ -0,0 +1,10968 @@
+diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
+index dd913eefbf31..33d3c988b951 100644
+--- a/Documentation/admin-guide/cgroup-v1/memory.rst
++++ b/Documentation/admin-guide/cgroup-v1/memory.rst
+@@ -64,6 +64,7 @@ Brief summary of control files.
+ 				     threads
+  cgroup.procs			     show list of processes
+  cgroup.event_control		     an interface for event_fd()
++				     This knob is not available on CONFIG_PREEMPT_RT systems.
+  memory.usage_in_bytes		     show current usage for memory
+ 				     (See 5.5 for details)
+  memory.memsw.usage_in_bytes	     show current usage for memory+Swap
+@@ -75,6 +76,7 @@ Brief summary of control files.
+  memory.max_usage_in_bytes	     show max memory usage recorded
+  memory.memsw.max_usage_in_bytes     show max memory+Swap usage recorded
+  memory.soft_limit_in_bytes	     set/show soft limit of memory usage
++				     This knob is not available on CONFIG_PREEMPT_RT systems.
+  memory.stat			     show various statistics
+  memory.use_hierarchy		     set/show hierarchical account enabled
+                                      This knob is deprecated and shouldn't be
+diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
+index d2c4c27e1702..d83c9ab49427 100644
+--- a/Documentation/dev-tools/kcov.rst
++++ b/Documentation/dev-tools/kcov.rst
+@@ -50,6 +50,7 @@ program using kcov:
+     #include <sys/mman.h>
+     #include <unistd.h>
+     #include <fcntl.h>
++    #include <linux/types.h>
+ 
+     #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+     #define KCOV_ENABLE			_IO('c', 100)
+@@ -177,6 +178,8 @@ Comparison operands collection is similar to coverage collection:
+ 	/* Read number of comparisons collected. */
+ 	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+ 	for (i = 0; i < n; i++) {
++		uint64_t ip;
++
+ 		type = cover[i * KCOV_WORDS_PER_CMP + 1];
+ 		/* arg1 and arg2 - operands of the comparison. */
+ 		arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
+@@ -251,6 +254,8 @@ selectively from different subsystems.
+ 
+ .. code-block:: c
+ 
++    /* Same includes and defines as above. */
++
+     struct kcov_remote_arg {
+ 	__u32		trace_mode;
+ 	__u32		area_size;
+diff --git a/arch/alpha/include/asm/spinlock_types.h b/arch/alpha/include/asm/spinlock_types.h
+index 1d5716bc060b..2526fd3be5fd 100644
+--- a/arch/alpha/include/asm/spinlock_types.h
++++ b/arch/alpha/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef _ALPHA_SPINLOCK_TYPES_H
+ #define _ALPHA_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index a8ae17f5740d..0e8631b96e0f 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -32,6 +32,7 @@ config ARM
+ 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
+ 	select ARCH_SUPPORTS_ATOMIC_RMW
+ 	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
++	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select ARCH_USE_BUILTIN_BSWAP
+ 	select ARCH_USE_CMPXCHG_LOCKREF
+ 	select ARCH_USE_MEMTEST
+@@ -68,7 +69,7 @@ config ARM
+ 	select HARDIRQS_SW_RESEND
+ 	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
+ 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
+-	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
++	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
+ 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+ 	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
+ 	select HAVE_ARCH_MMAP_RND_BITS if MMU
+@@ -109,6 +110,7 @@ config ARM
+ 	select HAVE_PERF_EVENTS
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_PREEMPT_LAZY
+ 	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+ 	select HAVE_RSEQ
+@@ -124,6 +126,7 @@ config ARM
+ 	select OLD_SIGSUSPEND3
+ 	select PCI_SYSCALL if PCI
+ 	select PERF_USE_VMALLOC
++	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
+ 	select RTC_LIB
+ 	select SYS_SUPPORTS_APM_EMULATION
+ 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
+diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
+index 5976958647fe..0c14b36ef101 100644
+--- a/arch/arm/include/asm/spinlock_types.h
++++ b/arch/arm/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
+index b682189a2b5d..e5e2ceb59544 100644
+--- a/arch/arm/include/asm/thread_info.h
++++ b/arch/arm/include/asm/thread_info.h
+@@ -52,6 +52,7 @@ struct cpu_context_save {
+ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
+ 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
++	int			preempt_lazy_count; /* 0 => preemptable, <0 => bug */
+ 	struct task_struct	*task;		/* main task structure */
+ 	__u32			cpu;		/* cpu */
+ 	__u32			cpu_domain;	/* cpu domain */
+@@ -130,6 +131,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+ #define TIF_UPROBE		3	/* breakpointed or singlestepping */
+ #define TIF_NOTIFY_SIGNAL	4	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	9
+ 
+ #define TIF_USING_IWMMXT	17
+ #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
+@@ -149,6 +151,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
+ #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
+ 
+ /* Checks for any syscall work in entry-common.S */
+@@ -158,7 +161,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *,
+ /*
+  * Change these and you break ASM code in entry-common.S
+  */
+-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++				 _TIF_SIGPENDING | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ 				 _TIF_NOTIFY_SIGNAL)
+ 
+diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
+index a646a3f6440f..beb09d74684f 100644
+--- a/arch/arm/kernel/asm-offsets.c
++++ b/arch/arm/kernel/asm-offsets.c
+@@ -43,6 +43,7 @@ int main(void)
+   BLANK();
+   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
+   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
++  DEFINE(TI_PREEMPT_LAZY,	offsetof(struct thread_info, preempt_lazy_count));
+   DEFINE(TI_TASK,		offsetof(struct thread_info, task));
+   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
+   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
+diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
+index 68261a83b7ad..fa7d110ce555 100644
+--- a/arch/arm/kernel/entry-armv.S
++++ b/arch/arm/kernel/entry-armv.S
+@@ -206,11 +206,18 @@ __irq_svc:
+ 
+ #ifdef CONFIG_PREEMPTION
+ 	ldr	r8, [tsk, #TI_PREEMPT]		@ get preempt count
+-	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
+ 	teq	r8, #0				@ if preempt count != 0
++	bne	1f				@ return from exeption
++	ldr	r0, [tsk, #TI_FLAGS]		@ get flags
++	tst	r0, #_TIF_NEED_RESCHED		@ if NEED_RESCHED is set
++	blne	svc_preempt			@ preempt!
++
++	ldr	r8, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
++	teq	r8, #0				@ if preempt lazy count != 0
+ 	movne	r0, #0				@ force flags to 0
+-	tst	r0, #_TIF_NEED_RESCHED
++	tst	r0, #_TIF_NEED_RESCHED_LAZY
+ 	blne	svc_preempt
++1:
+ #endif
+ 
+ 	svc_exit r5, irq = 1			@ return from exception
+@@ -225,8 +232,14 @@ svc_preempt:
+ 1:	bl	preempt_schedule_irq		@ irq en/disable is done inside
+ 	ldr	r0, [tsk, #TI_FLAGS]		@ get new tasks TI_FLAGS
+ 	tst	r0, #_TIF_NEED_RESCHED
++	bne	1b
++	tst	r0, #_TIF_NEED_RESCHED_LAZY
+ 	reteq	r8				@ go again
+-	b	1b
++	ldr	r0, [tsk, #TI_PREEMPT_LAZY]	@ get preempt lazy count
++	teq	r0, #0				@ if preempt lazy count != 0
++	beq	1b
++	ret	r8				@ go again
++
+ #endif
+ 
+ __und_fault:
+diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
+index 539897ac2828..4655f04ccdcd 100644
+--- a/arch/arm/kernel/signal.c
++++ b/arch/arm/kernel/signal.c
+@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+ 	 */
+ 	trace_hardirqs_off();
+ 	do {
+-		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
++		if (likely(thread_flags & (_TIF_NEED_RESCHED |
++					   _TIF_NEED_RESCHED_LAZY))) {
+ 			schedule();
+ 		} else {
+ 			if (unlikely(!user_mode(regs)))
+diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
+index af5177801fb1..1de016008e2e 100644
+--- a/arch/arm/mm/fault.c
++++ b/arch/arm/mm/fault.c
+@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ 	if (addr < TASK_SIZE)
+ 		return do_page_fault(addr, fsr, regs);
+ 
++	if (interrupts_enabled(regs))
++		local_irq_enable();
++
+ 	if (user_mode(regs))
+ 		goto bad_area;
+ 
+@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
+ static int
+ do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+ {
++	if (interrupts_enabled(regs))
++		local_irq_enable();
++
+ 	do_bad_area(addr, fsr, regs);
+ 	return 0;
+ }
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 9d3cbe786f8d..c86b845d0d79 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -88,6 +88,7 @@ config ARM64
+ 	select ARCH_SUPPORTS_ATOMIC_RMW
+ 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
+ 	select ARCH_SUPPORTS_NUMA_BALANCING
++	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+ 	select ARCH_WANT_DEFAULT_BPF_JIT
+ 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+@@ -191,6 +192,7 @@ config ARM64
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
++	select HAVE_PREEMPT_LAZY
+ 	select HAVE_FUNCTION_ARG_ACCESS_API
+ 	select HAVE_FUTEX_CMPXCHG if FUTEX
+ 	select MMU_GATHER_RCU_TABLE_FREE
+@@ -212,6 +214,7 @@ config ARM64
+ 	select PCI_DOMAINS_GENERIC if PCI
+ 	select PCI_ECAM if (ACPI && PCI)
+ 	select PCI_SYSCALL if PCI
++	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
+ 	select POWER_RESET
+ 	select POWER_SUPPLY
+ 	select SPARSE_IRQ
+diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
+index ed57717cd004..63b39229890b 100644
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -1001,7 +1001,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
+  */
+ static inline bool arch_faults_on_old_pte(void)
+ {
+-	WARN_ON(preemptible());
++	WARN_ON(is_migratable());
+ 
+ 	return !cpu_has_hw_af();
+ }
+diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
+index e83f0982b99c..2545c17281e1 100644
+--- a/arch/arm64/include/asm/preempt.h
++++ b/arch/arm64/include/asm/preempt.h
+@@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_and_test(void)
+ 	 * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+ 	 * pair.
+ 	 */
+-	return !pc || !READ_ONCE(ti->preempt_count);
++	if (!pc || !READ_ONCE(ti->preempt_count))
++		return true;
++#ifdef CONFIG_PREEMPT_LAZY
++	if ((pc & ~PREEMPT_NEED_RESCHED))
++		return false;
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
++	return false;
++#endif
+ }
+ 
+ static inline bool should_resched(int preempt_offset)
+ {
++#ifdef CONFIG_PREEMPT_LAZY
++	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
++	if (pc == preempt_offset)
++		return true;
++
++	if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset)
++		return false;
++
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
+ 	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ 	return pc == preempt_offset;
++#endif
+ }
+ 
+ #ifdef CONFIG_PREEMPTION
+diff --git a/arch/arm64/include/asm/signal.h b/arch/arm64/include/asm/signal.h
+index ef449f5f4ba8..5e535c3e4926 100644
+--- a/arch/arm64/include/asm/signal.h
++++ b/arch/arm64/include/asm/signal.h
+@@ -22,4 +22,8 @@ static inline void __user *arch_untagged_si_addr(void __user *addr,
+ }
+ #define arch_untagged_si_addr arch_untagged_si_addr
+ 
++#if defined(CONFIG_PREEMPT_RT)
++#define ARCH_RT_DELAYS_SIGNAL_SEND
++#endif
++
+ #endif
+diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
+index 18782f0c4721..11ab1c077697 100644
+--- a/arch/arm64/include/asm/spinlock_types.h
++++ b/arch/arm64/include/asm/spinlock_types.h
+@@ -5,7 +5,7 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+ 
+-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
++#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
+index 6623c99f0984..c55ccec33a5a 100644
+--- a/arch/arm64/include/asm/thread_info.h
++++ b/arch/arm64/include/asm/thread_info.h
+@@ -26,6 +26,7 @@ struct thread_info {
+ #ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ 	u64			ttbr0;		/* saved TTBR0_EL1 */
+ #endif
++	int			preempt_lazy_count;	/* 0 => preemptable, <0 => bug */
+ 	union {
+ 		u64		preempt_count;	/* 0 => preemptible, <0 => bug */
+ 		struct {
+@@ -67,6 +68,7 @@ int arch_dup_task_struct(struct task_struct *dst,
+ #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
+ #define TIF_MTE_ASYNC_FAULT	5	/* MTE Asynchronous Tag Check Fault */
+ #define TIF_NOTIFY_SIGNAL	6	/* signal notifications exist */
++#define TIF_NEED_RESCHED_LAZY	7
+ #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
+ #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
+ #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
+@@ -97,8 +99,10 @@ int arch_dup_task_struct(struct task_struct *dst,
+ #define _TIF_SVE		(1 << TIF_SVE)
+ #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ 
+-#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
++#define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
++				 _TIF_SIGPENDING | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ 				 _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
+ 				 _TIF_NOTIFY_SIGNAL)
+@@ -107,6 +111,8 @@ int arch_dup_task_struct(struct task_struct *dst,
+ 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
+ 				 _TIF_SYSCALL_EMU)
+ 
++#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++
+ #ifdef CONFIG_SHADOW_CALL_STACK
+ #define INIT_SCS							\
+ 	.scs_base	= init_shadow_call_stack,			\
+diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
+index 551427ae8cc5..96a4f6c9eb78 100644
+--- a/arch/arm64/kernel/asm-offsets.c
++++ b/arch/arm64/kernel/asm-offsets.c
+@@ -31,6 +31,7 @@ int main(void)
+   BLANK();
+   DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
+   DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
++  DEFINE(TSK_TI_PREEMPT_LAZY,	offsetof(struct task_struct, thread_info.preempt_lazy_count));
+ #ifdef CONFIG_ARM64_SW_TTBR0_PAN
+   DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
+ #endif
+diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
+index 7a3fcf21b18a..5689d2b0c0b6 100644
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -179,10 +179,19 @@ static void __get_cpu_fpsimd_context(void)
+  *
+  * The double-underscore version must only be called if you know the task
+  * can't be preempted.
++ *
++ * On RT kernels local_bh_disable() is not sufficient because it only
++ * serializes soft interrupt related sections via a local lock, but stays
++ * preemptible. Disabling preemption is the right choice here as bottom
++ * half processing is always in thread context on RT kernels so it
++ * implicitly prevents bottom half processing as well.
+  */
+ static void get_cpu_fpsimd_context(void)
+ {
+-	local_bh_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_bh_disable();
++	else
++		preempt_disable();
+ 	__get_cpu_fpsimd_context();
+ }
+ 
+@@ -203,7 +212,10 @@ static void __put_cpu_fpsimd_context(void)
+ static void put_cpu_fpsimd_context(void)
+ {
+ 	__put_cpu_fpsimd_context();
+-	local_bh_enable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_bh_enable();
++	else
++		preempt_enable();
+ }
+ 
+ static bool have_cpu_fpsimd_context(void)
+@@ -1033,6 +1045,7 @@ void fpsimd_thread_switch(struct task_struct *next)
+ void fpsimd_flush_thread(void)
+ {
+ 	int vl, supported_vl;
++	void *sve_state = NULL;
+ 
+ 	if (!system_supports_fpsimd())
+ 		return;
+@@ -1045,7 +1058,10 @@ void fpsimd_flush_thread(void)
+ 
+ 	if (system_supports_sve()) {
+ 		clear_thread_flag(TIF_SVE);
+-		sve_free(current);
++
++		/* Defer kfree() while in atomic context */
++		sve_state = current->thread.sve_state;
++		current->thread.sve_state = NULL;
+ 
+ 		/*
+ 		 * Reset the task vector length as required.
+@@ -1079,6 +1095,7 @@ void fpsimd_flush_thread(void)
+ 	}
+ 
+ 	put_cpu_fpsimd_context();
++	kfree(sve_state);
+ }
+ 
+ /*
+diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
+index b3e1beccf458..03183563feb8 100644
+--- a/arch/arm64/kernel/signal.c
++++ b/arch/arm64/kernel/signal.c
+@@ -922,7 +922,7 @@ static void do_signal(struct pt_regs *regs)
+ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
+ {
+ 	do {
+-		if (thread_flags & _TIF_NEED_RESCHED) {
++		if (thread_flags & _TIF_NEED_RESCHED_MASK) {
+ 			/* Unmask Debug and SError for the next task */
+ 			local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ 
+@@ -930,6 +930,14 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
+ 		} else {
+ 			local_daif_restore(DAIF_PROCCTX);
+ 
++#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
++			if (unlikely(current->forced_info.si_signo)) {
++				struct task_struct *t = current;
++				force_sig_info(&t->forced_info);
++				t->forced_info.si_signo = 0;
++			}
++#endif
++
+ 			if (thread_flags & _TIF_UPROBE)
+ 				uprobe_notify_resume(regs);
+ 
+diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
+index 3fe816c244ce..ba8c69cda361 100644
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -828,7 +828,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ 		 * involves poking the GIC, which must be done in a
+ 		 * non-preemptible context.
+ 		 */
+-		preempt_disable();
++		migrate_disable();
+ 
+ 		kvm_pmu_flush_hwstate(vcpu);
+ 
+@@ -852,7 +852,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ 				kvm_timer_sync_user(vcpu);
+ 			kvm_vgic_sync_hwstate(vcpu);
+ 			local_irq_enable();
+-			preempt_enable();
++			migrate_enable();
+ 			continue;
+ 		}
+ 
+@@ -921,7 +921,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+ 		/* Exit types that need handling before we can be preempted */
+ 		handle_exit_early(vcpu, ret);
+ 
+-		preempt_enable();
++		migrate_enable();
+ 
+ 		/*
+ 		 * The ARMv8 architecture doesn't give the hypervisor
+diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h
+index 8ff0f6ff3a00..db87a12c3827 100644
+--- a/arch/csky/include/asm/spinlock_types.h
++++ b/arch/csky/include/asm/spinlock_types.h
+@@ -3,7 +3,7 @@
+ #ifndef __ASM_CSKY_SPINLOCK_TYPES_H
+ #define __ASM_CSKY_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/hexagon/include/asm/spinlock_types.h b/arch/hexagon/include/asm/spinlock_types.h
+index 19d233497ba5..d5f66495b670 100644
+--- a/arch/hexagon/include/asm/spinlock_types.h
++++ b/arch/hexagon/include/asm/spinlock_types.h
+@@ -8,7 +8,7 @@
+ #ifndef _ASM_SPINLOCK_TYPES_H
+ #define _ASM_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h
+index 6e345fefcdca..14b8a161c165 100644
+--- a/arch/ia64/include/asm/spinlock_types.h
++++ b/arch/ia64/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_IA64_SPINLOCK_TYPES_H
+ #define _ASM_IA64_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
+index 27222b75d2a4..5495225807eb 100644
+--- a/arch/powerpc/Kconfig
++++ b/arch/powerpc/Kconfig
+@@ -151,6 +151,7 @@ config PPC
+ 	select ARCH_STACKWALK
+ 	select ARCH_SUPPORTS_ATOMIC_RMW
+ 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
++	select ARCH_SUPPORTS_RT			if HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select ARCH_USE_BUILTIN_BSWAP
+ 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+ 	select ARCH_USE_MEMTEST
+@@ -218,6 +219,7 @@ config PPC
+ 	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
+ 	select HAVE_IOREMAP_PROT
+ 	select HAVE_IRQ_TIME_ACCOUNTING
++	select HAVE_POSIX_CPU_TIMERS_TASK_WORK	if !KVM
+ 	select HAVE_KERNEL_GZIP
+ 	select HAVE_KERNEL_LZMA			if DEFAULT_UIMAGE
+ 	select HAVE_KERNEL_LZO			if DEFAULT_UIMAGE
+@@ -234,6 +236,7 @@ config PPC
+ 	select HAVE_PERF_EVENTS_NMI		if PPC64
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_PREEMPT_LAZY
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+ 	select HAVE_RELIABLE_STACKTRACE
+ 	select HAVE_RSEQ
+diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
+index 0f3cdd8faa95..08243338069d 100644
+--- a/arch/powerpc/include/asm/simple_spinlock_types.h
++++ b/arch/powerpc/include/asm/simple_spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+ #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
+index 7ef1cd8168a0..f9e63cacd220 100644
+--- a/arch/powerpc/include/asm/smp.h
++++ b/arch/powerpc/include/asm/smp.h
+@@ -62,6 +62,7 @@ struct smp_ops_t {
+ 
+ extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+ extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
++extern void smp_send_debugger_break_cpu(unsigned int cpu);
+ extern void smp_send_debugger_break(void);
+ extern void start_secondary_resume(void);
+ extern void smp_generic_give_timebase(void);
+diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
+index c5d742f18021..d5f8a74ed2e8 100644
+--- a/arch/powerpc/include/asm/spinlock_types.h
++++ b/arch/powerpc/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
+ #define _ASM_POWERPC_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
+index 1c8460e23583..b1653c160bab 100644
+--- a/arch/powerpc/include/asm/stackprotector.h
++++ b/arch/powerpc/include/asm/stackprotector.h
+@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void)
+ 	unsigned long canary;
+ 
+ 	/* Try to get a semi random initial value. */
++#ifdef CONFIG_PREEMPT_RT
++	canary = (unsigned long)&canary;
++#else
+ 	canary = get_random_canary();
++#endif
+ 	canary ^= mftb();
+ 	canary ^= LINUX_VERSION_CODE;
+ 	canary &= CANARY_MASK;
+diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
+index 87013ac2a640..2920ed371188 100644
+--- a/arch/powerpc/include/asm/thread_info.h
++++ b/arch/powerpc/include/asm/thread_info.h
+@@ -53,6 +53,8 @@
+ struct thread_info {
+ 	int		preempt_count;		/* 0 => preemptable,
+ 						   <0 => BUG */
++	int             preempt_lazy_count;	/* 0 => preemptable,
++						   <0 => BUG */
+ 	unsigned long	local_flags;		/* private flags for thread */
+ #ifdef CONFIG_LIVEPATCH
+ 	unsigned long *livepatch_sp;
+@@ -99,6 +101,7 @@ void arch_setup_new_exec(void);
+ #define TIF_PATCH_PENDING	6	/* pending live patching update */
+ #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+ #define TIF_SINGLESTEP		8	/* singlestepping active */
++#define TIF_NEED_RESCHED_LAZY	9	/* lazy rescheduling necessary */
+ #define TIF_SECCOMP		10	/* secure computing */
+ #define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
+ #define TIF_NOERROR		12	/* Force successful syscall return */
+@@ -114,6 +117,7 @@ void arch_setup_new_exec(void);
+ #define TIF_POLLING_NRFLAG	19	/* true if poll_idle() is polling TIF_NEED_RESCHED */
+ #define TIF_32BIT		20	/* 32 bit binary */
+ 
++
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+@@ -125,6 +129,7 @@ void arch_setup_new_exec(void);
+ #define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
+ #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
++#define _TIF_NEED_RESCHED_LAZY	(1<<TIF_NEED_RESCHED_LAZY)
+ #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+ #define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
+ #define _TIF_NOERROR		(1<<TIF_NOERROR)
+@@ -138,10 +143,12 @@ void arch_setup_new_exec(void);
+ 				 _TIF_SYSCALL_EMU)
+ 
+ #define _TIF_USER_WORK_MASK	(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
++				 _TIF_NEED_RESCHED_LAZY | \
+ 				 _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
+ 				 _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \
+ 				 _TIF_NOTIFY_SIGNAL)
+ #define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR)
++#define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+ 
+ /* Bits in local_flags */
+ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
+diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
+index 8703df709cce..a5157035a84e 100644
+--- a/arch/powerpc/kernel/interrupt.c
++++ b/arch/powerpc/kernel/interrupt.c
+@@ -348,7 +348,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+ 	ti_flags = READ_ONCE(current_thread_info()->flags);
+ 	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ 		local_irq_enable();
+-		if (ti_flags & _TIF_NEED_RESCHED) {
++		if (ti_flags & _TIF_NEED_RESCHED_MASK) {
+ 			schedule();
+ 		} else {
+ 			/*
+@@ -554,11 +554,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+ 		/* Returning to a kernel context with local irqs enabled. */
+ 		WARN_ON_ONCE(!(regs->msr & MSR_EE));
+ again:
+-		if (IS_ENABLED(CONFIG_PREEMPT)) {
++		if (IS_ENABLED(CONFIG_PREEMPTION)) {
+ 			/* Return to preemptible kernel context */
+ 			if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
+ 				if (preempt_count() == 0)
+ 					preempt_schedule_irq();
++			} else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) {
++				if ((preempt_count() == 0) &&
++				    (current_thread_info()->preempt_lazy_count == 0))
++					preempt_schedule_irq();
+ 			}
+ 		}
+ 
+diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
+index c4f1d6b7d992..02e17a57da83 100644
+--- a/arch/powerpc/kernel/irq.c
++++ b/arch/powerpc/kernel/irq.c
+@@ -690,6 +690,7 @@ static inline void check_stack_overflow(void)
+ 	}
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
+ static __always_inline void call_do_softirq(const void *sp)
+ {
+ 	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+@@ -708,6 +709,7 @@ static __always_inline void call_do_softirq(const void *sp)
+ 		   "r11", "r12"
+ 	);
+ }
++#endif
+ 
+ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+ {
+@@ -820,10 +822,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly;
+ void *softirq_ctx[NR_CPUS] __read_mostly;
+ void *hardirq_ctx[NR_CPUS] __read_mostly;
+ 
++#ifndef CONFIG_PREEMPT_RT
+ void do_softirq_own_stack(void)
+ {
+ 	call_do_softirq(softirq_ctx[smp_processor_id()]);
+ }
++#endif
+ 
+ irq_hw_number_t virq_to_hw(unsigned int virq)
+ {
+diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
+index bdee7262c080..d57d37497862 100644
+--- a/arch/powerpc/kernel/kgdb.c
++++ b/arch/powerpc/kernel/kgdb.c
+@@ -120,11 +120,19 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
+ 
+ static int kgdb_debugger_ipi(struct pt_regs *regs)
+ {
+-	kgdb_nmicallback(raw_smp_processor_id(), regs);
++	int cpu = raw_smp_processor_id();
++
++	if (!kgdb_roundup_delay(cpu))
++		kgdb_nmicallback(cpu, regs);
+ 	return 0;
+ }
+ 
+ #ifdef CONFIG_SMP
++void kgdb_roundup_cpu(unsigned int cpu)
++{
++	smp_send_debugger_break_cpu(cpu);
++}
++
+ void kgdb_roundup_cpus(void)
+ {
+ 	smp_send_debugger_break();
+diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
+index fb95f92dcfac..308765f2e7a0 100644
+--- a/arch/powerpc/kernel/smp.c
++++ b/arch/powerpc/kernel/smp.c
+@@ -590,6 +590,11 @@ static void debugger_ipi_callback(struct pt_regs *regs)
+ 	debugger_ipi(regs);
+ }
+ 
++void smp_send_debugger_break_cpu(unsigned int cpu)
++{
++	smp_send_nmi_ipi(cpu, debugger_ipi_callback, 1000000);
++}
++
+ void smp_send_debugger_break(void)
+ {
+ 	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
+diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
+index a08bb7cefdc5..ae34f68eedc1 100644
+--- a/arch/powerpc/kernel/traps.c
++++ b/arch/powerpc/kernel/traps.c
+@@ -260,12 +260,17 @@ static char *get_mmu_str(void)
+ 
+ static int __die(const char *str, struct pt_regs *regs, long err)
+ {
++	const char *pr = "";
++
+ 	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+ 
++	if (IS_ENABLED(CONFIG_PREEMPTION))
++		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
++
+ 	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
+ 	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+ 	       PAGE_SIZE / 1024, get_mmu_str(),
+-	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
++	       pr,
+ 	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ 	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+ 	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
+index ff581d70f20c..e5c84d55bdfb 100644
+--- a/arch/powerpc/kvm/Kconfig
++++ b/arch/powerpc/kvm/Kconfig
+@@ -178,6 +178,7 @@ config KVM_E500MC
+ config KVM_MPIC
+ 	bool "KVM in-kernel MPIC emulation"
+ 	depends on KVM && E500
++	depends on !PREEMPT_RT
+ 	select HAVE_KVM_IRQCHIP
+ 	select HAVE_KVM_IRQFD
+ 	select HAVE_KVM_IRQ_ROUTING
+diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
+index ec5d84b4958c..62a80ecc6735 100644
+--- a/arch/powerpc/platforms/pseries/iommu.c
++++ b/arch/powerpc/platforms/pseries/iommu.c
+@@ -24,6 +24,7 @@
+ #include <linux/of.h>
+ #include <linux/iommu.h>
+ #include <linux/rculist.h>
++#include <linux/local_lock.h>
+ #include <asm/io.h>
+ #include <asm/prom.h>
+ #include <asm/rtas.h>
+@@ -200,7 +201,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ 	return ret;
+ }
+ 
+-static DEFINE_PER_CPU(__be64 *, tce_page);
++struct tce_page {
++	__be64 * page;
++	local_lock_t lock;
++};
++static DEFINE_PER_CPU(struct tce_page, tce_page) = {
++	.lock = INIT_LOCAL_LOCK(lock),
++};
+ 
+ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 				     long npages, unsigned long uaddr,
+@@ -223,9 +230,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		                           direction, attrs);
+ 	}
+ 
+-	local_irq_save(flags);	/* to protect tcep and the page behind it */
++	/* to protect tcep and the page behind it */
++	local_lock_irqsave(&tce_page.lock, flags);
+ 
+-	tcep = __this_cpu_read(tce_page);
++	tcep = __this_cpu_read(tce_page.page);
+ 
+ 	/* This is safe to do since interrupts are off when we're called
+ 	 * from iommu_alloc{,_sg}()
+@@ -234,12 +242,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ 		/* If allocation fails, fall back to the loop implementation */
+ 		if (!tcep) {
+-			local_irq_restore(flags);
++			local_unlock_irqrestore(&tce_page.lock, flags);
+ 			return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ 					tceshift,
+ 					npages, uaddr, direction, attrs);
+ 		}
+-		__this_cpu_write(tce_page, tcep);
++		__this_cpu_write(tce_page.page, tcep);
+ 	}
+ 
+ 	rpn = __pa(uaddr) >> tceshift;
+@@ -269,7 +277,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 		tcenum += limit;
+ 	} while (npages > 0 && !rc);
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&tce_page.lock, flags);
+ 
+ 	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ 		ret = (int)rc;
+@@ -454,16 +462,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ 				DMA_BIDIRECTIONAL, 0);
+ 	}
+ 
+-	local_irq_disable();	/* to protect tcep and the page behind it */
+-	tcep = __this_cpu_read(tce_page);
++	/* to protect tcep and the page behind it */
++	local_lock_irq(&tce_page.lock);
++	tcep = __this_cpu_read(tce_page.page);
+ 
+ 	if (!tcep) {
+ 		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ 		if (!tcep) {
+-			local_irq_enable();
++			local_unlock_irq(&tce_page.lock);
+ 			return -ENOMEM;
+ 		}
+-		__this_cpu_write(tce_page, tcep);
++		__this_cpu_write(tce_page.page, tcep);
+ 	}
+ 
+ 	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+@@ -506,7 +515,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ 
+ 	/* error cleanup: caller will clear whole range */
+ 
+-	local_irq_enable();
++	local_unlock_irq(&tce_page.lock);
+ 	return rc;
+ }
+ 
+diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
+index f398e7638dd6..5a35a49505da 100644
+--- a/arch/riscv/include/asm/spinlock_types.h
++++ b/arch/riscv/include/asm/spinlock_types.h
+@@ -6,7 +6,7 @@
+ #ifndef _ASM_RISCV_SPINLOCK_TYPES_H
+ #define _ASM_RISCV_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
+index a2bbfd7df85f..b69695e39957 100644
+--- a/arch/s390/include/asm/spinlock_types.h
++++ b/arch/s390/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/sh/include/asm/spinlock_types.h b/arch/sh/include/asm/spinlock_types.h
+index e82369f286a2..907bda4b1619 100644
+--- a/arch/sh/include/asm/spinlock_types.h
++++ b/arch/sh/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef __ASM_SH_SPINLOCK_TYPES_H
+ #define __ASM_SH_SPINLOCK_TYPES_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
+index ef0f0827cf57..2d3eca8fee01 100644
+--- a/arch/sh/kernel/irq.c
++++ b/arch/sh/kernel/irq.c
+@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu)
+ 	hardirq_ctx[cpu] = NULL;
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
+ void do_softirq_own_stack(void)
+ {
+ 	struct thread_info *curctx;
+@@ -176,6 +177,7 @@ void do_softirq_own_stack(void)
+ 		  "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr"
+ 	);
+ }
++#endif
+ #else
+ static inline void handle_one_irq(unsigned int irq)
+ {
+diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
+index c8848bb681a1..41fa1be980a3 100644
+--- a/arch/sparc/kernel/irq_64.c
++++ b/arch/sparc/kernel/irq_64.c
+@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs)
+ 	set_irq_regs(old_regs);
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
+ void do_softirq_own_stack(void)
+ {
+ 	void *orig_sp, *sp = softirq_stack[smp_processor_id()];
+@@ -869,6 +870,7 @@ void do_softirq_own_stack(void)
+ 	__asm__ __volatile__("mov %0, %%sp"
+ 			     : : "r" (orig_sp));
+ }
++#endif
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+ void fixup_irqs(void)
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index a08ce6360382..4a4498670861 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -107,6 +107,7 @@ config X86
+ 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
+ 	select ARCH_SUPPORTS_LTO_CLANG
+ 	select ARCH_SUPPORTS_LTO_CLANG_THIN
++	select ARCH_SUPPORTS_RT
+ 	select ARCH_USE_BUILTIN_BSWAP
+ 	select ARCH_USE_MEMTEST
+ 	select ARCH_USE_QUEUED_RWLOCKS
+@@ -230,6 +231,7 @@ config X86
+ 	select HAVE_PCI
+ 	select HAVE_PERF_REGS
+ 	select HAVE_PERF_USER_STACK_DUMP
++	select HAVE_PREEMPT_LAZY
+ 	select MMU_GATHER_RCU_TABLE_FREE		if PARAVIRT
+ 	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
+ 	select HAVE_REGS_AND_STACK_ACCESS_API
+diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
+index e087cd7837c3..96cc92f63b06 100644
+--- a/arch/x86/include/asm/irq_stack.h
++++ b/arch/x86/include/asm/irq_stack.h
+@@ -202,6 +202,7 @@
+ 			      IRQ_CONSTRAINTS, regs, vector);		\
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
+ /*
+  * Macro to invoke __do_softirq on the irq stack. This is only called from
+  * task context when bottom halves are about to be reenabled and soft
+@@ -215,6 +216,8 @@
+ 	__this_cpu_write(hardirq_stack_inuse, false);			\
+ }
+ 
++#endif
++
+ #else /* CONFIG_X86_64 */
+ /* System vector handlers always run on the stack they interrupted. */
+ #define run_sysvec_on_irqstack_cond(func, regs)				\
+diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
+index fe5efbcba824..ab8cb5fc2329 100644
+--- a/arch/x86/include/asm/preempt.h
++++ b/arch/x86/include/asm/preempt.h
+@@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val)
+  * a decrement which hits zero means we have no preempt_count and should
+  * reschedule.
+  */
+-static __always_inline bool __preempt_count_dec_and_test(void)
++static __always_inline bool ____preempt_count_dec_and_test(void)
+ {
+ 	return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
+ }
+ 
++static __always_inline bool __preempt_count_dec_and_test(void)
++{
++	if (____preempt_count_dec_and_test())
++		return true;
++#ifdef CONFIG_PREEMPT_LAZY
++	if (preempt_count())
++		return false;
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
++	return false;
++#endif
++}
++
+ /*
+  * Returns true when we need to resched and can (barring IRQ state).
+  */
+ static __always_inline bool should_resched(int preempt_offset)
+ {
++#ifdef CONFIG_PREEMPT_LAZY
++	u32 tmp;
++	tmp = raw_cpu_read_4(__preempt_count);
++	if (tmp == preempt_offset)
++		return true;
++
++	/* preempt count == 0 ? */
++	tmp &= ~PREEMPT_NEED_RESCHED;
++	if (tmp != preempt_offset)
++		return false;
++	/* XXX PREEMPT_LOCK_OFFSET */
++	if (current_thread_info()->preempt_lazy_count)
++		return false;
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++#else
+ 	return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
++#endif
+ }
+ 
+ #ifdef CONFIG_PREEMPTION
+diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
+index 2dfb5fea13af..fc03f4f7ed84 100644
+--- a/arch/x86/include/asm/signal.h
++++ b/arch/x86/include/asm/signal.h
+@@ -28,6 +28,19 @@ typedef struct {
+ #define SA_IA32_ABI	0x02000000u
+ #define SA_X32_ABI	0x01000000u
+ 
++/*
++ * Because some traps use the IST stack, we must keep preemption
++ * disabled while calling do_trap(), but do_trap() may call
++ * force_sig_info() which will grab the signal spin_locks for the
++ * task, which in PREEMPT_RT are mutexes.  By defining
++ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
++ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
++ * trap.
++ */
++#if defined(CONFIG_PREEMPT_RT)
++#define ARCH_RT_DELAYS_SIGNAL_SEND
++#endif
++
+ #ifndef CONFIG_COMPAT
+ #define compat_sigset_t compat_sigset_t
+ typedef sigset_t compat_sigset_t;
+diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
+index 24a8d6c4fb18..2fc22c27df18 100644
+--- a/arch/x86/include/asm/stackprotector.h
++++ b/arch/x86/include/asm/stackprotector.h
+@@ -50,7 +50,7 @@
+  */
+ static __always_inline void boot_init_stack_canary(void)
+ {
+-	u64 canary;
++	u64 canary = 0;
+ 	u64 tsc;
+ 
+ #ifdef CONFIG_X86_64
+@@ -61,8 +61,14 @@ static __always_inline void boot_init_stack_canary(void)
+ 	 * of randomness. The TSC only matters for very early init,
+ 	 * there it already has some randomness on most systems. Later
+ 	 * on during the bootup the random pool has true entropy too.
++	 * For preempt-rt we need to weaken the randomness a bit, as
++	 * we can't call into the random generator from atomic context
++	 * due to locking constraints. We just leave canary
++	 * uninitialized and use the TSC based randomness on top of it.
+ 	 */
++#ifndef CONFIG_PREEMPT_RT
+ 	get_random_bytes(&canary, sizeof(canary));
++#endif
+ 	tsc = rdtsc();
+ 	canary += tsc + (tsc << 32UL);
+ 	canary &= CANARY_MASK;
+diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
+index cf132663c219..75dc786e6365 100644
+--- a/arch/x86/include/asm/thread_info.h
++++ b/arch/x86/include/asm/thread_info.h
+@@ -57,11 +57,14 @@ struct thread_info {
+ 	unsigned long		flags;		/* low level flags */
+ 	unsigned long		syscall_work;	/* SYSCALL_WORK_ flags */
+ 	u32			status;		/* thread synchronous flags */
++	int			preempt_lazy_count;	/* 0 => lazy preemptable
++							   <0 => BUG */
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)			\
+ {						\
+ 	.flags		= 0,			\
++	.preempt_lazy_count	= 0,		\
+ }
+ 
+ #else /* !__ASSEMBLY__ */
+@@ -90,6 +93,7 @@ struct thread_info {
+ #define TIF_NOTSC		16	/* TSC is not accessible in userland */
+ #define TIF_NOTIFY_SIGNAL	17	/* signal notifications exist */
+ #define TIF_SLD			18	/* Restore split lock detection on context switch */
++#define TIF_NEED_RESCHED_LAZY	19	/* lazy rescheduling necessary */
+ #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
+ #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
+ #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
+@@ -114,6 +118,7 @@ struct thread_info {
+ #define _TIF_NOTSC		(1 << TIF_NOTSC)
+ #define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
+ #define _TIF_SLD		(1 << TIF_SLD)
++#define _TIF_NEED_RESCHED_LAZY	(1 << TIF_NEED_RESCHED_LAZY)
+ #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
+ #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
+ #define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
+diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
+index 044902d5a3c4..e5dd6da78713 100644
+--- a/arch/x86/kernel/irq_32.c
++++ b/arch/x86/kernel/irq_32.c
+@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
+ 	return 0;
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
+ void do_softirq_own_stack(void)
+ {
+ 	struct irq_stack *irqstk;
+@@ -148,6 +149,7 @@ void do_softirq_own_stack(void)
+ 
+ 	call_on_stack(__do_softirq, isp);
+ }
++#endif
+ 
+ void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
+ {
+diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
+index 3a43a2dee658..37bd37cdf2b6 100644
+--- a/arch/x86/kernel/kgdb.c
++++ b/arch/x86/kernel/kgdb.c
+@@ -502,9 +502,12 @@ static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
+ 		if (atomic_read(&kgdb_active) != -1) {
+ 			/* KGDB CPU roundup */
+ 			cpu = raw_smp_processor_id();
+-			kgdb_nmicallback(cpu, regs);
+-			set_bit(cpu, was_in_debug_nmi);
+-			touch_nmi_watchdog();
++
++			if (!kgdb_roundup_delay(cpu)) {
++				kgdb_nmicallback(cpu, regs);
++				set_bit(cpu, was_in_debug_nmi);
++				touch_nmi_watchdog();
++			}
+ 
+ 			return NMI_HANDLED;
+ 		}
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 7e1e3bc74562..38639c57b462 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8686,6 +8686,14 @@ int kvm_arch_init(void *opaque)
+ 		goto out;
+ 	}
+ 
++#ifdef CONFIG_PREEMPT_RT
++	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
++		pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
++		r = -EOPNOTSUPP;
++		goto out;
++	}
++#endif
++
+ 	r = -ENOMEM;
+ 	x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
+ 					  __alignof__(struct fpu), SLAB_ACCOUNT,
+diff --git a/arch/xtensa/include/asm/spinlock_types.h b/arch/xtensa/include/asm/spinlock_types.h
+index 64c9389254f1..797aed7df3dd 100644
+--- a/arch/xtensa/include/asm/spinlock_types.h
++++ b/arch/xtensa/include/asm/spinlock_types.h
+@@ -2,7 +2,7 @@
+ #ifndef __ASM_SPINLOCK_TYPES_H
+ #define __ASM_SPINLOCK_TYPES_H
+ 
+-#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H)
++#if !defined(__LINUX_SPINLOCK_TYPES_RAW_H) && !defined(__ASM_SPINLOCK_H)
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index bbbbcd2c1941..0fc928de505d 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -1567,14 +1567,14 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
+ 		return;
+ 
+ 	if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
+-		int cpu = get_cpu();
++		int cpu = get_cpu_light();
+ 		if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ 			__blk_mq_run_hw_queue(hctx);
+-			put_cpu();
++			put_cpu_light();
+ 			return;
+ 		}
+ 
+-		put_cpu();
++		put_cpu_light();
+ 	}
+ 
+ 	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work,
+diff --git a/crypto/testmgr.c b/crypto/testmgr.c
+index 163a1283a866..444183fe847d 100644
+--- a/crypto/testmgr.c
++++ b/crypto/testmgr.c
+@@ -1061,14 +1061,14 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
+ 
+ static void crypto_disable_simd_for_test(void)
+ {
+-	preempt_disable();
++	migrate_disable();
+ 	__this_cpu_write(crypto_simd_disabled_for_test, true);
+ }
+ 
+ static void crypto_reenable_simd_for_test(void)
+ {
+ 	__this_cpu_write(crypto_simd_disabled_for_test, false);
+-	preempt_enable();
++	migrate_enable();
+ }
+ 
+ /*
+diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
+index 6383c81ac5b3..abb695f5f5e4 100644
+--- a/drivers/block/zram/zram_drv.c
++++ b/drivers/block/zram/zram_drv.c
+@@ -59,6 +59,40 @@ static void zram_free_page(struct zram *zram, size_t index);
+ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+ 				u32 index, int offset, struct bio *bio);
+ 
++#ifdef CONFIG_PREEMPT_RT
++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
++{
++	size_t index;
++
++	for (index = 0; index < num_pages; index++)
++		spin_lock_init(&zram->table[index].lock);
++}
++
++static int zram_slot_trylock(struct zram *zram, u32 index)
++{
++	int ret;
++
++	ret = spin_trylock(&zram->table[index].lock);
++	if (ret)
++		__set_bit(ZRAM_LOCK, &zram->table[index].flags);
++	return ret;
++}
++
++static void zram_slot_lock(struct zram *zram, u32 index)
++{
++	spin_lock(&zram->table[index].lock);
++	__set_bit(ZRAM_LOCK, &zram->table[index].flags);
++}
++
++static void zram_slot_unlock(struct zram *zram, u32 index)
++{
++	__clear_bit(ZRAM_LOCK, &zram->table[index].flags);
++	spin_unlock(&zram->table[index].lock);
++}
++
++#else
++
++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
+ 
+ static int zram_slot_trylock(struct zram *zram, u32 index)
+ {
+@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index)
+ {
+ 	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
+ }
++#endif
+ 
+ static inline bool init_done(struct zram *zram)
+ {
+@@ -1169,6 +1204,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
+ 
+ 	if (!huge_class_size)
+ 		huge_class_size = zs_huge_class_size(zram->mem_pool);
++	zram_meta_init_table_locks(zram, num_pages);
+ 	return true;
+ }
+ 
+diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
+index 80c3b43b4828..d8f6d880f915 100644
+--- a/drivers/block/zram/zram_drv.h
++++ b/drivers/block/zram/zram_drv.h
+@@ -63,6 +63,7 @@ struct zram_table_entry {
+ 		unsigned long element;
+ 	};
+ 	unsigned long flags;
++	spinlock_t lock;
+ #ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ 	ktime_t ac_time;
+ #endif
+diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
+index dfb463ee7ca1..b19c4f745ee3 100644
+--- a/drivers/char/tpm/tpm_tis.c
++++ b/drivers/char/tpm/tpm_tis.c
+@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da
+ 	return container_of(data, struct tpm_tis_tcg_phy, priv);
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * Flushes previous write operations to chip so that a subsequent
++ * ioread*()s won't stall a cpu.
++ */
++static inline void tpm_tis_flush(void __iomem *iobase)
++{
++	ioread8(iobase + TPM_ACCESS(0));
++}
++#else
++#define tpm_tis_flush(iobase) do { } while (0)
++#endif
++
++static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr)
++{
++	iowrite8(b, iobase + addr);
++	tpm_tis_flush(iobase);
++}
++
++static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
++{
++	iowrite32(b, iobase + addr);
++	tpm_tis_flush(iobase);
++}
++
+ static int interrupts = -1;
+ module_param(interrupts, int, 0444);
+ MODULE_PARM_DESC(interrupts, "Enable interrupts");
+@@ -186,7 +211,7 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
+ 	struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
+ 
+ 	while (len--)
+-		iowrite8(*value++, phy->iobase + addr);
++		tpm_tis_iowrite8(*value++, phy->iobase, addr);
+ 
+ 	return 0;
+ }
+@@ -213,7 +238,7 @@ static int tpm_tcg_write32(struct tpm_tis_data *data, u32 addr, u32 value)
+ {
+ 	struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data);
+ 
+-	iowrite32(value, phy->iobase + addr);
++	tpm_tis_iowrite32(value, phy->iobase, addr);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
+index 332739f3eded..8589df0e8c1f 100644
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -66,7 +66,7 @@ struct mm_struct efi_mm = {
+ 
+ struct workqueue_struct *efi_rts_wq;
+ 
+-static bool disable_runtime;
++static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT);
+ static int __init setup_noefi(char *arg)
+ {
+ 	disable_runtime = true;
+@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char *str)
+ 	if (parse_option_str(str, "noruntime"))
+ 		disable_runtime = true;
+ 
++	if (parse_option_str(str, "runtime"))
++		disable_runtime = false;
++
+ 	if (parse_option_str(str, "nosoftreserve"))
+ 		set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags);
+ 
+diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c
+index 254e67141a77..7a39029b083f 100644
+--- a/drivers/gpu/drm/i915/display/intel_crtc.c
++++ b/drivers/gpu/drm/i915/display/intel_crtc.c
+@@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+ 	 */
+ 	intel_psr_wait_for_idle(new_crtc_state);
+ 
+-	local_irq_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_disable();
+ 
+ 	crtc->debug.min_vbl = min;
+ 	crtc->debug.max_vbl = max;
+@@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+ 			break;
+ 		}
+ 
+-		local_irq_enable();
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++			local_irq_enable();
+ 
+ 		timeout = schedule_timeout(timeout);
+ 
+-		local_irq_disable();
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++			local_irq_disable();
+ 	}
+ 
+ 	finish_wait(wq, &wait);
+@@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state)
+ 	return;
+ 
+ irq_disable:
+-	local_irq_disable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_disable();
+ }
+ 
+ #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
+@@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state)
+ 		new_crtc_state->uapi.event = NULL;
+ 	}
+ 
+-	local_irq_enable();
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		local_irq_enable();
+ 
+ 	/* Send VRR Push to terminate Vblank */
+ 	intel_vrr_send_push(new_crtc_state);
+diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+index 209cf265bf74..6e1b9068d944 100644
+--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+@@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
+ 	/* Kick the work once more to drain the signalers, and disarm the irq */
+ 	irq_work_sync(&b->irq_work);
+ 	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
+-		local_irq_disable();
+-		signal_irq_work(&b->irq_work);
+-		local_irq_enable();
++		irq_work_queue(&b->irq_work);
+ 		cond_resched();
++		irq_work_sync(&b->irq_work);
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
+index c41098950746..601274ba86e4 100644
+--- a/drivers/gpu/drm/i915/gt/intel_context.h
++++ b/drivers/gpu/drm/i915/gt/intel_context.h
+@@ -163,7 +163,8 @@ static inline void intel_context_enter(struct intel_context *ce)
+ 
+ static inline void intel_context_mark_active(struct intel_context *ce)
+ {
+-	lockdep_assert_held(&ce->timeline->mutex);
++	lockdep_assert(lockdep_is_held(&ce->timeline->mutex) ||
++		       test_bit(CONTEXT_IS_PARKED, &ce->flags));
+ 	++ce->active_count;
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
+index a63631ea0ec4..314457fb9db5 100644
+--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
++++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
+@@ -112,6 +112,7 @@ struct intel_context {
+ #define CONTEXT_FORCE_SINGLE_SUBMISSION	7
+ #define CONTEXT_NOPREEMPT		8
+ #define CONTEXT_LRCA_DIRTY		9
++#define CONTEXT_IS_PARKED		10
+ 
+ 	struct {
+ 		u64 timeout_us;
+diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+index dacd62773735..73e96ca024df 100644
+--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+@@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf)
+ 	return 0;
+ }
+ 
+-#if IS_ENABLED(CONFIG_LOCKDEP)
+-
+-static unsigned long __timeline_mark_lock(struct intel_context *ce)
+-{
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
+-	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+-
+-	return flags;
+-}
+-
+-static void __timeline_mark_unlock(struct intel_context *ce,
+-				   unsigned long flags)
+-{
+-	mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
+-	local_irq_restore(flags);
+-}
+-
+-#else
+-
+-static unsigned long __timeline_mark_lock(struct intel_context *ce)
+-{
+-	return 0;
+-}
+-
+-static void __timeline_mark_unlock(struct intel_context *ce,
+-				   unsigned long flags)
+-{
+-}
+-
+-#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+-
+ static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
+ {
+ 	struct i915_request *rq = to_request(fence);
+@@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+ {
+ 	struct intel_context *ce = engine->kernel_context;
+ 	struct i915_request *rq;
+-	unsigned long flags;
+ 	bool result = true;
+ 
+ 	/* GPU is pointing to the void, as good as in the kernel context. */
+@@ -201,7 +167,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+ 	 * engine->wakeref.count, we may see the request completion and retire
+ 	 * it causing an underflow of the engine->wakeref.
+ 	 */
+-	flags = __timeline_mark_lock(ce);
++	set_bit(CONTEXT_IS_PARKED, &ce->flags);
+ 	GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
+ 
+ 	rq = __i915_request_create(ce, GFP_NOWAIT);
+@@ -233,7 +199,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+ 
+ 	result = false;
+ out_unlock:
+-	__timeline_mark_unlock(ce, flags);
++	clear_bit(CONTEXT_IS_PARKED, &ce->flags);
+ 	return result;
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+index 773ff5121833..f330457209d5 100644
+--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+@@ -1286,7 +1286,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 	 * and context switches) submission.
+ 	 */
+ 
+-	spin_lock(&sched_engine->lock);
++	spin_lock_irq(&sched_engine->lock);
+ 
+ 	/*
+ 	 * If the queue is higher priority than the last
+@@ -1386,7 +1386,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 				 * Even if ELSP[1] is occupied and not worthy
+ 				 * of timeslices, our queue might be.
+ 				 */
+-				spin_unlock(&sched_engine->lock);
++				spin_unlock_irq(&sched_engine->lock);
+ 				return;
+ 			}
+ 		}
+@@ -1412,7 +1412,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 
+ 		if (last && !can_merge_rq(last, rq)) {
+ 			spin_unlock(&ve->base.sched_engine->lock);
+-			spin_unlock(&engine->sched_engine->lock);
++			spin_unlock_irq(&engine->sched_engine->lock);
+ 			return; /* leave this for another sibling */
+ 		}
+ 
+@@ -1574,7 +1574,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 	 */
+ 	sched_engine->queue_priority_hint = queue_prio(sched_engine);
+ 	i915_sched_engine_reset_on_empty(sched_engine);
+-	spin_unlock(&sched_engine->lock);
++	spin_unlock_irq(&sched_engine->lock);
+ 
+ 	/*
+ 	 * We can skip poking the HW if we ended up with exactly the same set
+@@ -1600,13 +1600,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
+ 	}
+ }
+ 
+-static void execlists_dequeue_irq(struct intel_engine_cs *engine)
+-{
+-	local_irq_disable(); /* Suspend interrupts across request submission */
+-	execlists_dequeue(engine);
+-	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
+-}
+-
+ static void clear_ports(struct i915_request **ports, int count)
+ {
+ 	memset_p((void **)ports, NULL, count);
+@@ -2442,7 +2435,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
+ 	}
+ 
+ 	if (!engine->execlists.pending[0]) {
+-		execlists_dequeue_irq(engine);
++		execlists_dequeue(engine);
+ 		start_timeslice(engine);
+ 	}
+ 
+diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
+index 9bc4f4a8e12e..547347241a47 100644
+--- a/drivers/gpu/drm/i915/i915_irq.c
++++ b/drivers/gpu/drm/i915/i915_irq.c
+@@ -886,7 +886,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+ 	 */
+ 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+ 
+-	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
+ 
+ 	/* Get optional system timestamp before query. */
+ 	if (stime)
+@@ -950,7 +951,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc,
+ 	if (etime)
+ 		*etime = ktime_get();
+ 
+-	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
+ 
+ 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+ 
+diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
+index 79da5eca60af..b9dd6100c6d1 100644
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request)
+ 
+ 	RQ_TRACE(request, "\n");
+ 
+-	GEM_BUG_ON(!irqs_disabled());
+ 	lockdep_assert_held(&engine->sched_engine->lock);
+ 
+ 	/*
+@@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request)
+ 	 */
+ 	RQ_TRACE(request, "\n");
+ 
+-	GEM_BUG_ON(!irqs_disabled());
+ 	lockdep_assert_held(&engine->sched_engine->lock);
+ 
+ 	/*
+diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
+index 1bc1349ba3c2..a2f713b4ac2f 100644
+--- a/drivers/gpu/drm/i915/i915_request.h
++++ b/drivers/gpu/drm/i915/i915_request.h
+@@ -609,7 +609,8 @@ i915_request_timeline(const struct i915_request *rq)
+ {
+ 	/* Valid only while the request is being constructed (or retired). */
+ 	return rcu_dereference_protected(rq->timeline,
+-					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex));
++					 lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) ||
++					 test_bit(CONTEXT_IS_PARKED, &rq->context->flags));
+ }
+ 
+ static inline struct i915_gem_context *
+diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
+index 63fec1c3c132..f345a0f12bf6 100644
+--- a/drivers/gpu/drm/i915/i915_trace.h
++++ b/drivers/gpu/drm/i915/i915_trace.h
+@@ -2,6 +2,10 @@
+ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+ #define _I915_TRACE_H_
+ 
++#ifdef CONFIG_PREEMPT_RT
++#define NOTRACE
++#endif
++
+ #include <linux/stringify.h>
+ #include <linux/types.h>
+ #include <linux/tracepoint.h>
+@@ -819,7 +823,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
+ 	     TP_ARGS(rq)
+ );
+ 
+-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
++#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
+ DEFINE_EVENT(i915_request, i915_request_guc_submit,
+ 	     TP_PROTO(struct i915_request *rq),
+ 	     TP_ARGS(rq)
+diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
+index 5259edacde38..b36b27c09049 100644
+--- a/drivers/gpu/drm/i915/i915_utils.h
++++ b/drivers/gpu/drm/i915/i915_utils.h
+@@ -343,7 +343,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
+ #define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
+ 
+ /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
+-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
++#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
+ # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
+ #else
+ # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
+diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
+index 1cf68f85b2e1..8ccf0c928bb4 100644
+--- a/drivers/i2c/busses/i2c-cht-wc.c
++++ b/drivers/i2c/busses/i2c-cht-wc.c
+@@ -99,15 +99,8 @@ static irqreturn_t cht_wc_i2c_adap_thread_handler(int id, void *data)
+ 	 * interrupt handler as well, so running the client irq handler from
+ 	 * this thread will cause things to lock up.
+ 	 */
+-	if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ) {
+-		/*
+-		 * generic_handle_irq expects local IRQs to be disabled
+-		 * as normally it is called from interrupt context.
+-		 */
+-		local_irq_disable();
+-		generic_handle_irq(adap->client_irq);
+-		local_irq_enable();
+-	}
++	if (reg & CHT_WC_EXTCHGRIRQ_CLIENT_IRQ)
++		generic_handle_irq_safe(adap->client_irq);
+ 
+ 	return IRQ_HANDLED;
+ }
+diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
+index 8fb065caf30b..c232535ca8f4 100644
+--- a/drivers/i2c/i2c-core-base.c
++++ b/drivers/i2c/i2c-core-base.c
+@@ -1422,7 +1422,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr)
+ 	if (irq <= 0)
+ 		return -ENXIO;
+ 
+-	generic_handle_irq(irq);
++	generic_handle_irq_safe(irq);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
+index 1f1d57288085..dc6816d36d06 100644
+--- a/drivers/leds/trigger/Kconfig
++++ b/drivers/leds/trigger/Kconfig
+@@ -64,6 +64,7 @@ config LEDS_TRIGGER_BACKLIGHT
+ 
+ config LEDS_TRIGGER_CPU
+ 	bool "LED CPU Trigger"
++	depends on !PREEMPT_RT
+ 	help
+ 	  This allows LEDs to be controlled by active CPUs. This shows
+ 	  the active CPUs across an array of LEDs so you can see which
+diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
+index c2a42486f985..451a22641b5a 100644
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -2218,8 +2218,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+ 	struct raid5_percpu *percpu;
+ 	unsigned long cpu;
+ 
+-	cpu = get_cpu();
++	cpu = get_cpu_light();
+ 	percpu = per_cpu_ptr(conf->percpu, cpu);
++	spin_lock(&percpu->lock);
+ 	if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
+ 		ops_run_biofill(sh);
+ 		overlap_clear++;
+@@ -2278,7 +2279,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
+ 			if (test_and_clear_bit(R5_Overlap, &dev->flags))
+ 				wake_up(&sh->raid_conf->wait_for_overlap);
+ 		}
+-	put_cpu();
++	spin_unlock(&percpu->lock);
++	put_cpu_light();
+ }
+ 
+ static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
+@@ -7110,6 +7112,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
+ 			__func__, cpu);
+ 		return -ENOMEM;
+ 	}
++	spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
+ 	return 0;
+ }
+ 
+diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
+index 5c05acf20e1f..665fe138ab4f 100644
+--- a/drivers/md/raid5.h
++++ b/drivers/md/raid5.h
+@@ -635,6 +635,7 @@ struct r5conf {
+ 	int			recovery_disabled;
+ 	/* per cpu variables */
+ 	struct raid5_percpu {
++		spinlock_t	lock;		/* Protection for -RT */
+ 		struct page	*spare_page; /* Used when checking P/Q in raid6 */
+ 		void		*scribble;  /* space for constructing buffer
+ 					     * lists and performing address
+diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
+index 70fa18b04ad2..b14d3f98e1eb 100644
+--- a/drivers/mfd/ezx-pcap.c
++++ b/drivers/mfd/ezx-pcap.c
+@@ -193,13 +193,11 @@ static void pcap_isr_work(struct work_struct *work)
+ 		ezx_pcap_write(pcap, PCAP_REG_MSR, isr | msr);
+ 		ezx_pcap_write(pcap, PCAP_REG_ISR, isr);
+ 
+-		local_irq_disable();
+ 		service = isr & ~msr;
+ 		for (irq = pcap->irq_base; service; service >>= 1, irq++) {
+ 			if (service & 1)
+-				generic_handle_irq(irq);
++				generic_handle_irq_safe(irq);
+ 		}
+-		local_irq_enable();
+ 		ezx_pcap_write(pcap, PCAP_REG_MSR, pcap->msr);
+ 	} while (gpio_get_value(pdata->gpio));
+ }
+diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
+index 08535e97ff43..0585a5821d05 100644
+--- a/drivers/misc/hi6421v600-irq.c
++++ b/drivers/misc/hi6421v600-irq.c
+@@ -118,8 +118,8 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
+ 			 * If both powerkey down and up IRQs are received,
+ 			 * handle them at the right order
+ 			 */
+-			generic_handle_irq(priv->irqs[POWERKEY_DOWN]);
+-			generic_handle_irq(priv->irqs[POWERKEY_UP]);
++			generic_handle_irq_safe(priv->irqs[POWERKEY_DOWN]);
++			generic_handle_irq_safe(priv->irqs[POWERKEY_UP]);
+ 			pending &= ~HISI_IRQ_POWERKEY_UP_DOWN;
+ 		}
+ 
+@@ -127,7 +127,7 @@ static irqreturn_t hi6421v600_irq_handler(int irq, void *__priv)
+ 			continue;
+ 
+ 		for_each_set_bit(offset, &pending, BITS_PER_BYTE) {
+-			generic_handle_irq(priv->irqs[offset + i * BITS_PER_BYTE]);
++			generic_handle_irq_safe(priv->irqs[offset + i * BITS_PER_BYTE]);
+ 		}
+ 	}
+ 
+diff --git a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+index 2473fb5f75e5..2a5cc64227e9 100644
+--- a/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
++++ b/drivers/net/ethernet/netronome/nfp/abm/qdisc.c
+@@ -458,7 +458,7 @@ nfp_abm_qdisc_graft(struct nfp_abm_link *alink, u32 handle, u32 child_handle,
+ static void
+ nfp_abm_stats_calculate(struct nfp_alink_stats *new,
+ 			struct nfp_alink_stats *old,
+-			struct gnet_stats_basic_packed *bstats,
++			struct gnet_stats_basic_sync *bstats,
+ 			struct gnet_stats_queue *qstats)
+ {
+ 	_bstats_update(bstats, new->tx_bytes - old->tx_bytes,
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 5700c9d20a3e..be3330a1c922 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1367,11 +1367,8 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
+ 		netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
+ 		lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
+ 
+-		if (dev->domain_data.phyirq > 0) {
+-			local_irq_disable();
+-			generic_handle_irq(dev->domain_data.phyirq);
+-			local_irq_enable();
+-		}
++		if (dev->domain_data.phyirq > 0)
++			generic_handle_irq_safe(dev->domain_data.phyirq);
+ 	} else {
+ 		netdev_warn(dev->net,
+ 			    "unexpected interrupt: 0x%08x\n", intdata);
+diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
+index 76dbdae0e987..967431858dcd 100644
+--- a/drivers/scsi/fcoe/fcoe.c
++++ b/drivers/scsi/fcoe/fcoe.c
+@@ -1450,11 +1450,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev,
+ static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen)
+ {
+ 	struct fcoe_percpu_s *fps;
+-	int rc;
++	int rc, cpu = get_cpu_light();
+ 
+-	fps = &get_cpu_var(fcoe_percpu);
++	fps = &per_cpu(fcoe_percpu, cpu);
+ 	rc = fcoe_get_paged_crc_eof(skb, tlen, fps);
+-	put_cpu_var(fcoe_percpu);
++	put_cpu_light();
+ 
+ 	return rc;
+ }
+@@ -1639,11 +1639,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport,
+ 		return 0;
+ 	}
+ 
+-	stats = per_cpu_ptr(lport->stats, get_cpu());
++	stats = per_cpu_ptr(lport->stats, get_cpu_light());
+ 	stats->InvalidCRCCount++;
+ 	if (stats->InvalidCRCCount < 5)
+ 		printk(KERN_WARNING "fcoe: dropping frame with CRC error\n");
+-	put_cpu();
++	put_cpu_light();
+ 	return -EINVAL;
+ }
+ 
+@@ -1684,7 +1684,7 @@ static void fcoe_recv_frame(struct sk_buff *skb)
+ 	 */
+ 	hp = (struct fcoe_hdr *) skb_network_header(skb);
+ 
+-	stats = per_cpu_ptr(lport->stats, get_cpu());
++	stats = per_cpu_ptr(lport->stats, get_cpu_light());
+ 	if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) {
+ 		if (stats->ErrorFrames < 5)
+ 			printk(KERN_WARNING "fcoe: FCoE version "
+@@ -1716,13 +1716,13 @@ static void fcoe_recv_frame(struct sk_buff *skb)
+ 		goto drop;
+ 
+ 	if (!fcoe_filter_frames(lport, fp)) {
+-		put_cpu();
++		put_cpu_light();
+ 		fc_exch_recv(lport, fp);
+ 		return;
+ 	}
+ drop:
+ 	stats->ErrorFrames++;
+-	put_cpu();
++	put_cpu_light();
+ 	kfree_skb(skb);
+ }
+ 
+diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
+index 558f3f4e1859..f08feaa4f398 100644
+--- a/drivers/scsi/fcoe/fcoe_ctlr.c
++++ b/drivers/scsi/fcoe/fcoe_ctlr.c
+@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
+ 
+ 	INIT_LIST_HEAD(&del_list);
+ 
+-	stats = per_cpu_ptr(fip->lp->stats, get_cpu());
++	stats = per_cpu_ptr(fip->lp->stats, get_cpu_light());
+ 
+ 	list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
+ 		deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
+@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
+ 				sel_time = fcf->time;
+ 		}
+ 	}
+-	put_cpu();
++	put_cpu_light();
+ 
+ 	list_for_each_entry_safe(fcf, next, &del_list, list) {
+ 		/* Removes fcf from current list */
+diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
+index aa223db4cf53..0ceb93800704 100644
+--- a/drivers/scsi/libfc/fc_exch.c
++++ b/drivers/scsi/libfc/fc_exch.c
+@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
+ 	}
+ 	memset(ep, 0, sizeof(*ep));
+ 
+-	cpu = get_cpu();
++	cpu = get_cpu_light();
+ 	pool = per_cpu_ptr(mp->pool, cpu);
+ 	spin_lock_bh(&pool->lock);
+-	put_cpu();
++	put_cpu_light();
+ 
+ 	/* peek cache of free slot */
+ 	if (pool->left != FC_XID_UNKNOWN) {
+diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
+index 7e6347fe93f9..8a7cf1d0e968 100644
+--- a/drivers/staging/greybus/gpio.c
++++ b/drivers/staging/greybus/gpio.c
+@@ -391,10 +391,7 @@ static int gb_gpio_request_handler(struct gb_operation *op)
+ 		return -EINVAL;
+ 	}
+ 
+-	local_irq_disable();
+-	ret = generic_handle_irq(irq);
+-	local_irq_enable();
+-
++	ret = generic_handle_irq_safe(irq);
+ 	if (ret)
+ 		dev_err(dev, "failed to invoke irq handler\n");
+ 
+diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
+index bb1a98c97adf..8639210a89c7 100644
+--- a/drivers/tty/serial/8250/8250.h
++++ b/drivers/tty/serial/8250/8250.h
+@@ -156,12 +156,55 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value)
+ 	up->dl_write(up, value);
+ }
+ 
++static inline void serial8250_set_IER(struct uart_8250_port *up,
++				      unsigned char ier)
++{
++	struct uart_port *port = &up->port;
++	unsigned long flags;
++	bool is_console;
++
++	is_console = uart_console(port);
++
++	if (is_console)
++		console_atomic_lock(flags);
++
++	serial_out(up, UART_IER, ier);
++
++	if (is_console)
++		console_atomic_unlock(flags);
++}
++
++static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
++{
++	struct uart_port *port = &up->port;
++	unsigned int clearval = 0;
++	unsigned long flags;
++	unsigned int prior;
++	bool is_console;
++
++	is_console = uart_console(port);
++
++	if (up->capabilities & UART_CAP_UUE)
++		clearval = UART_IER_UUE;
++
++	if (is_console)
++		console_atomic_lock(flags);
++
++	prior = serial_port_in(port, UART_IER);
++	serial_port_out(port, UART_IER, clearval);
++
++	if (is_console)
++		console_atomic_unlock(flags);
++
++	return prior;
++}
++
+ static inline bool serial8250_set_THRI(struct uart_8250_port *up)
+ {
+ 	if (up->ier & UART_IER_THRI)
+ 		return false;
+ 	up->ier |= UART_IER_THRI;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	return true;
+ }
+ 
+@@ -170,7 +213,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
+ 	if (!(up->ier & UART_IER_THRI))
+ 		return false;
+ 	up->ier &= ~UART_IER_THRI;
+-	serial_out(up, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	return true;
+ }
+ 
+diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
+index 1890f342f090..16d5d450b32f 100644
+--- a/drivers/tty/serial/8250/8250_core.c
++++ b/drivers/tty/serial/8250/8250_core.c
+@@ -265,10 +265,8 @@ static void serial8250_backup_timeout(struct timer_list *t)
+ 	 * Must disable interrupts or else we risk racing with the interrupt
+ 	 * based handler.
+ 	 */
+-	if (up->port.irq) {
+-		ier = serial_in(up, UART_IER);
+-		serial_out(up, UART_IER, 0);
+-	}
++	if (up->port.irq)
++		ier = serial8250_clear_IER(up);
+ 
+ 	iir = serial_in(up, UART_IIR);
+ 
+@@ -291,7 +289,7 @@ static void serial8250_backup_timeout(struct timer_list *t)
+ 		serial8250_tx_chars(up);
+ 
+ 	if (up->port.irq)
+-		serial_out(up, UART_IER, ier);
++		serial8250_set_IER(up, ier);
+ 
+ 	spin_unlock_irqrestore(&up->port.lock, flags);
+ 
+@@ -578,6 +576,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev)
+ 
+ #ifdef CONFIG_SERIAL_8250_CONSOLE
+ 
++static void univ8250_console_write_atomic(struct console *co, const char *s,
++					  unsigned int count)
++{
++	struct uart_8250_port *up = &serial8250_ports[co->index];
++
++	serial8250_console_write_atomic(up, s, count);
++}
++
+ static void univ8250_console_write(struct console *co, const char *s,
+ 				   unsigned int count)
+ {
+@@ -671,6 +677,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx,
+ 
+ static struct console univ8250_console = {
+ 	.name		= "ttyS",
++	.write_atomic	= univ8250_console_write_atomic,
+ 	.write		= univ8250_console_write,
+ 	.device		= uart_console_device,
+ 	.setup		= univ8250_console_setup,
+diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
+index 6a22f3a970f3..a6c02140eff0 100644
+--- a/drivers/tty/serial/8250/8250_fsl.c
++++ b/drivers/tty/serial/8250/8250_fsl.c
+@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port *port)
+ 
+ 	/* Stop processing interrupts on input overrun */
+ 	if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
++		unsigned long flags;
+ 		unsigned long delay;
++		bool is_console;
+ 
++		is_console = uart_console(port);
++
++		if (is_console)
++			console_atomic_lock(flags);
+ 		up->ier = port->serial_in(port, UART_IER);
++		if (is_console)
++			console_atomic_unlock(flags);
++
+ 		if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
+ 			port->ops->stop_rx(port);
+ 		} else {
+diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
+index 65402d05eff9..8122645ab05c 100644
+--- a/drivers/tty/serial/8250/8250_ingenic.c
++++ b/drivers/tty/serial/8250/8250_ingenic.c
+@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart",
+ 
+ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
+ {
++	unsigned long flags;
++	bool is_console;
+ 	int ier;
+ 
+ 	switch (offset) {
+@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
+ 		 * If we have enabled modem status IRQs we should enable
+ 		 * modem mode.
+ 		 */
++		is_console = uart_console(p);
++		if (is_console)
++			console_atomic_lock(flags);
+ 		ier = p->serial_in(p, UART_IER);
++		if (is_console)
++			console_atomic_unlock(flags);
+ 
+ 		if (ier & UART_IER_MSI)
+ 			value |= UART_MCR_MDCE | UART_MCR_FCM;
+diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
+index de48a58460f4..364ee950f21a 100644
+--- a/drivers/tty/serial/8250/8250_mtk.c
++++ b/drivers/tty/serial/8250/8250_mtk.c
+@@ -222,12 +222,37 @@ static void mtk8250_shutdown(struct uart_port *port)
+ 
+ static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
+ {
+-	serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
++	struct uart_port *port = &up->port;
++	unsigned long flags;
++	unsigned int ier;
++	bool is_console;
++
++	is_console = uart_console(port);
++
++	if (is_console)
++		console_atomic_lock(flags);
++
++	ier = serial_in(up, UART_IER);
++	serial_out(up, UART_IER, ier & (~mask));
++
++	if (is_console)
++		console_atomic_unlock(flags);
+ }
+ 
+ static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
+ {
+-	serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
++	struct uart_port *port = &up->port;
++	unsigned long flags;
++	unsigned int ier;
++
++	if (uart_console(port))
++		console_atomic_lock(flags);
++
++	ier = serial_in(up, UART_IER);
++	serial_out(up, UART_IER, ier | mask);
++
++	if (uart_console(port))
++		console_atomic_unlock(flags);
+ }
+ 
+ static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
+diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
+index bfdd9ecc2baf..479b94b3238a 100644
+--- a/drivers/tty/serial/8250/8250_port.c
++++ b/drivers/tty/serial/8250/8250_port.c
+@@ -752,7 +752,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
+ 			serial_out(p, UART_EFR, UART_EFR_ECB);
+ 			serial_out(p, UART_LCR, 0);
+ 		}
+-		serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
++		serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
+ 		if (p->capabilities & UART_CAP_EFR) {
+ 			serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
+ 			serial_out(p, UART_EFR, efr);
+@@ -1427,7 +1427,7 @@ static void serial8250_stop_rx(struct uart_port *port)
+ 
+ 	up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
+ 	up->port.read_status_mask &= ~UART_LSR_DR;
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 
+ 	serial8250_rpm_put(up);
+ }
+@@ -1457,7 +1457,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p)
+ 		serial8250_clear_and_reinit_fifos(p);
+ 
+ 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
+-		serial_port_out(&p->port, UART_IER, p->ier);
++		serial8250_set_IER(p, p->ier);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
+@@ -1693,7 +1693,7 @@ static void serial8250_disable_ms(struct uart_port *port)
+ 	mctrl_gpio_disable_ms(up->gpios);
+ 
+ 	up->ier &= ~UART_IER_MSI;
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ }
+ 
+ static void serial8250_enable_ms(struct uart_port *port)
+@@ -1709,7 +1709,7 @@ static void serial8250_enable_ms(struct uart_port *port)
+ 	up->ier |= UART_IER_MSI;
+ 
+ 	serial8250_rpm_get(up);
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 	serial8250_rpm_put(up);
+ }
+ 
+@@ -2143,14 +2143,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
+ 	struct uart_8250_port *up = up_to_u8250p(port);
+ 
+ 	serial8250_rpm_get(up);
+-	/*
+-	 *	First save the IER then disable the interrupts
+-	 */
+-	ier = serial_port_in(port, UART_IER);
+-	if (up->capabilities & UART_CAP_UUE)
+-		serial_port_out(port, UART_IER, UART_IER_UUE);
+-	else
+-		serial_port_out(port, UART_IER, 0);
++	ier = serial8250_clear_IER(up);
+ 
+ 	wait_for_xmitr(up, BOTH_EMPTY);
+ 	/*
+@@ -2163,7 +2156,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
+ 	 *	and restore the IER
+ 	 */
+ 	wait_for_xmitr(up, BOTH_EMPTY);
+-	serial_port_out(port, UART_IER, ier);
++	serial8250_set_IER(up, ier);
+ 	serial8250_rpm_put(up);
+ }
+ 
+@@ -2468,7 +2461,7 @@ void serial8250_do_shutdown(struct uart_port *port)
+ 	 */
+ 	spin_lock_irqsave(&port->lock, flags);
+ 	up->ier = 0;
+-	serial_port_out(port, UART_IER, 0);
++	serial8250_set_IER(up, 0);
+ 	spin_unlock_irqrestore(&port->lock, flags);
+ 
+ 	synchronize_irq(port->irq);
+@@ -2850,7 +2843,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
+ 	if (up->capabilities & UART_CAP_RTOIE)
+ 		up->ier |= UART_IER_RTOIE;
+ 
+-	serial_port_out(port, UART_IER, up->ier);
++	serial8250_set_IER(up, up->ier);
+ 
+ 	if (up->capabilities & UART_CAP_EFR) {
+ 		unsigned char efr = 0;
+@@ -3315,7 +3308,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults);
+ 
+ #ifdef CONFIG_SERIAL_8250_CONSOLE
+ 
+-static void serial8250_console_putchar(struct uart_port *port, int ch)
++static void serial8250_console_putchar_locked(struct uart_port *port, int ch)
+ {
+ 	struct uart_8250_port *up = up_to_u8250p(port);
+ 
+@@ -3323,6 +3316,18 @@ static void serial8250_console_putchar(struct uart_port *port, int ch)
+ 	serial_port_out(port, UART_TX, ch);
+ }
+ 
++static void serial8250_console_putchar(struct uart_port *port, int ch)
++{
++	struct uart_8250_port *up = up_to_u8250p(port);
++	unsigned long flags;
++
++	wait_for_xmitr(up, UART_LSR_THRE);
++
++	console_atomic_lock(flags);
++	serial8250_console_putchar_locked(port, ch);
++	console_atomic_unlock(flags);
++}
++
+ /*
+  *	Restore serial console when h/w power-off detected
+  */
+@@ -3349,6 +3354,32 @@ static void serial8250_console_restore(struct uart_8250_port *up)
+ 	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
+ }
+ 
++void serial8250_console_write_atomic(struct uart_8250_port *up,
++				     const char *s, unsigned int count)
++{
++	struct uart_port *port = &up->port;
++	unsigned long flags;
++	unsigned int ier;
++
++	console_atomic_lock(flags);
++
++	touch_nmi_watchdog();
++
++	ier = serial8250_clear_IER(up);
++
++	if (atomic_fetch_inc(&up->console_printing)) {
++		uart_console_write(port, "\n", 1,
++				   serial8250_console_putchar_locked);
++	}
++	uart_console_write(port, s, count, serial8250_console_putchar_locked);
++	atomic_dec(&up->console_printing);
++
++	wait_for_xmitr(up, BOTH_EMPTY);
++	serial8250_set_IER(up, ier);
++
++	console_atomic_unlock(flags);
++}
++
+ /*
+  *	Print a string to the serial port trying not to disturb
+  *	any possible real use of the port...
+@@ -3365,24 +3396,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 	struct uart_port *port = &up->port;
+ 	unsigned long flags;
+ 	unsigned int ier;
+-	int locked = 1;
+ 
+ 	touch_nmi_watchdog();
+ 
+-	if (oops_in_progress)
+-		locked = spin_trylock_irqsave(&port->lock, flags);
+-	else
+-		spin_lock_irqsave(&port->lock, flags);
+-
+-	/*
+-	 *	First save the IER then disable the interrupts
+-	 */
+-	ier = serial_port_in(port, UART_IER);
++	spin_lock_irqsave(&port->lock, flags);
+ 
+-	if (up->capabilities & UART_CAP_UUE)
+-		serial_port_out(port, UART_IER, UART_IER_UUE);
+-	else
+-		serial_port_out(port, UART_IER, 0);
++	ier = serial8250_clear_IER(up);
+ 
+ 	/* check scratch reg to see if port powered off during system sleep */
+ 	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
+@@ -3396,7 +3415,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 		mdelay(port->rs485.delay_rts_before_send);
+ 	}
+ 
++	atomic_inc(&up->console_printing);
+ 	uart_console_write(port, s, count, serial8250_console_putchar);
++	atomic_dec(&up->console_printing);
+ 
+ 	/*
+ 	 *	Finally, wait for transmitter to become empty
+@@ -3409,8 +3430,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 		if (em485->tx_stopped)
+ 			up->rs485_stop_tx(up);
+ 	}
+-
+-	serial_port_out(port, UART_IER, ier);
++	serial8250_set_IER(up, ier);
+ 
+ 	/*
+ 	 *	The receive handling will happen properly because the
+@@ -3422,8 +3442,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 	if (up->msr_saved_flags)
+ 		serial8250_modem_status(up);
+ 
+-	if (locked)
+-		spin_unlock_irqrestore(&port->lock, flags);
++	spin_unlock_irqrestore(&port->lock, flags);
+ }
+ 
+ static unsigned int probe_baud(struct uart_port *port)
+@@ -3443,6 +3462,7 @@ static unsigned int probe_baud(struct uart_port *port)
+ 
+ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
+ {
++	struct uart_8250_port *up = up_to_u8250p(port);
+ 	int baud = 9600;
+ 	int bits = 8;
+ 	int parity = 'n';
+@@ -3452,6 +3472,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
+ 	if (!port->iobase && !port->membase)
+ 		return -ENODEV;
+ 
++	atomic_set(&up->console_printing, 0);
++
+ 	if (options)
+ 		uart_parse_options(options, &baud, &parity, &bits, &flow);
+ 	else if (probe)
+diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
+index b91fe25a64a1..5986658e130b 100644
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -2340,18 +2340,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
+ {
+ 	struct uart_amba_port *uap = amba_ports[co->index];
+ 	unsigned int old_cr = 0, new_cr;
+-	unsigned long flags;
++	unsigned long flags = 0;
+ 	int locked = 1;
+ 
+ 	clk_enable(uap->clk);
+ 
+-	local_irq_save(flags);
++	/*
++	 * local_irq_save(flags);
++	 *
++	 * This local_irq_save() is nonsense. If we come in via sysrq
++	 * handling then interrupts are already disabled. Aside of
++	 * that the port.sysrq check is racy on SMP regardless.
++	*/
+ 	if (uap->port.sysrq)
+ 		locked = 0;
+ 	else if (oops_in_progress)
+-		locked = spin_trylock(&uap->port.lock);
++		locked = spin_trylock_irqsave(&uap->port.lock, flags);
+ 	else
+-		spin_lock(&uap->port.lock);
++		spin_lock_irqsave(&uap->port.lock, flags);
+ 
+ 	/*
+ 	 *	First save the CR then disable the interrupts
+@@ -2377,8 +2383,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
+ 		pl011_write(old_cr, uap, REG_CR);
+ 
+ 	if (locked)
+-		spin_unlock(&uap->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&uap->port.lock, flags);
+ 
+ 	clk_disable(uap->clk);
+ }
+diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
+index 0862941862c8..10970632f0e4 100644
+--- a/drivers/tty/serial/omap-serial.c
++++ b/drivers/tty/serial/omap-serial.c
+@@ -1255,13 +1255,10 @@ serial_omap_console_write(struct console *co, const char *s,
+ 	unsigned int ier;
+ 	int locked = 1;
+ 
+-	local_irq_save(flags);
+-	if (up->port.sysrq)
+-		locked = 0;
+-	else if (oops_in_progress)
+-		locked = spin_trylock(&up->port.lock);
++	if (up->port.sysrq || oops_in_progress)
++		locked = spin_trylock_irqsave(&up->port.lock, flags);
+ 	else
+-		spin_lock(&up->port.lock);
++		spin_lock_irqsave(&up->port.lock, flags);
+ 
+ 	/*
+ 	 * First save the IER then disable the interrupts
+@@ -1288,8 +1285,7 @@ serial_omap_console_write(struct console *co, const char *s,
+ 		check_modem_status(up);
+ 
+ 	if (locked)
+-		spin_unlock(&up->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&up->port.lock, flags);
+ }
+ 
+ static int __init
+diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c
+index df5184979b28..d4ad211dce7a 100644
+--- a/drivers/virt/acrn/irqfd.c
++++ b/drivers/virt/acrn/irqfd.c
+@@ -17,7 +17,6 @@
+ #include "acrn_drv.h"
+ 
+ static LIST_HEAD(acrn_irqfd_clients);
+-static DEFINE_MUTEX(acrn_irqfds_mutex);
+ 
+ /**
+  * struct hsm_irqfd - Properties of HSM irqfd
+diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
+index 45cfd50a9521..502b56597f10 100644
+--- a/fs/afs/dir_silly.c
++++ b/fs/afs/dir_silly.c
+@@ -239,7 +239,7 @@ int afs_silly_iput(struct dentry *dentry, struct inode *inode)
+ 	struct dentry *alias;
+ 	int ret;
+ 
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 
+ 	_enter("%p{%pd},%llx", dentry, dentry, vnode->fid.vnode);
+ 
+diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
+index 1929e80c09ee..48eb8c30c6db 100644
+--- a/fs/cifs/readdir.c
++++ b/fs/cifs/readdir.c
+@@ -69,7 +69,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
+ 	struct inode *inode;
+ 	struct super_block *sb = parent->d_sb;
+ 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 
+ 	cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
+ 
+diff --git a/fs/dcache.c b/fs/dcache.c
+index cf871a81f4fd..02db80f2817f 100644
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2537,7 +2537,13 @@ EXPORT_SYMBOL(d_rehash);
+ 
+ static inline unsigned start_dir_add(struct inode *dir)
+ {
+-
++	/*
++	 * The caller has a spinlock_t (dentry::d_lock) acquired which disables
++	 * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable
++	 * preemption and it has be done explicitly.
++	 */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
+ 	for (;;) {
+ 		unsigned n = dir->i_dir_seq;
+ 		if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
+@@ -2549,25 +2555,30 @@ static inline unsigned start_dir_add(struct inode *dir)
+ static inline void end_dir_add(struct inode *dir, unsigned n)
+ {
+ 	smp_store_release(&dir->i_dir_seq, n + 2);
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
+ }
+ 
+ static void d_wait_lookup(struct dentry *dentry)
+ {
+-	if (d_in_lookup(dentry)) {
+-		DECLARE_WAITQUEUE(wait, current);
+-		add_wait_queue(dentry->d_wait, &wait);
+-		do {
+-			set_current_state(TASK_UNINTERRUPTIBLE);
+-			spin_unlock(&dentry->d_lock);
+-			schedule();
+-			spin_lock(&dentry->d_lock);
+-		} while (d_in_lookup(dentry));
+-	}
++	struct swait_queue __wait;
++
++	if (!d_in_lookup(dentry))
++		return;
++
++	INIT_LIST_HEAD(&__wait.task_list);
++	do {
++		prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE);
++		spin_unlock(&dentry->d_lock);
++		schedule();
++		spin_lock(&dentry->d_lock);
++	} while (d_in_lookup(dentry));
++	finish_swait(dentry->d_wait, &__wait);
+ }
+ 
+ struct dentry *d_alloc_parallel(struct dentry *parent,
+ 				const struct qstr *name,
+-				wait_queue_head_t *wq)
++				struct swait_queue_head *wq)
+ {
+ 	unsigned int hash = name->hash;
+ 	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
+@@ -2682,7 +2693,7 @@ void __d_lookup_done(struct dentry *dentry)
+ 	hlist_bl_lock(b);
+ 	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
+ 	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
+-	wake_up_all(dentry->d_wait);
++	swake_up_all(dentry->d_wait);
+ 	dentry->d_wait = NULL;
+ 	hlist_bl_unlock(b);
+ 	INIT_HLIST_NODE(&dentry->d_u.d_alias);
+diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
+index c3e4804b8fcb..9edb87e11680 100644
+--- a/fs/fscache/internal.h
++++ b/fs/fscache/internal.h
+@@ -81,7 +81,6 @@ extern unsigned fscache_debug;
+ extern struct kobject *fscache_root;
+ extern struct workqueue_struct *fscache_object_wq;
+ extern struct workqueue_struct *fscache_op_wq;
+-DECLARE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+ 
+ extern unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n);
+ 
+diff --git a/fs/fscache/main.c b/fs/fscache/main.c
+index 4207f98e405f..85f8cf3a323d 100644
+--- a/fs/fscache/main.c
++++ b/fs/fscache/main.c
+@@ -41,8 +41,6 @@ struct kobject *fscache_root;
+ struct workqueue_struct *fscache_object_wq;
+ struct workqueue_struct *fscache_op_wq;
+ 
+-DEFINE_PER_CPU(wait_queue_head_t, fscache_object_cong_wait);
+-
+ /* these values serve as lower bounds, will be adjusted in fscache_init() */
+ static unsigned fscache_object_max_active = 4;
+ static unsigned fscache_op_max_active = 2;
+@@ -138,7 +136,6 @@ unsigned int fscache_hash(unsigned int salt, unsigned int *data, unsigned int n)
+ static int __init fscache_init(void)
+ {
+ 	unsigned int nr_cpus = num_possible_cpus();
+-	unsigned int cpu;
+ 	int ret;
+ 
+ 	fscache_object_max_active =
+@@ -161,9 +158,6 @@ static int __init fscache_init(void)
+ 	if (!fscache_op_wq)
+ 		goto error_op_wq;
+ 
+-	for_each_possible_cpu(cpu)
+-		init_waitqueue_head(&per_cpu(fscache_object_cong_wait, cpu));
+-
+ 	ret = fscache_proc_init();
+ 	if (ret < 0)
+ 		goto error_proc;
+diff --git a/fs/fscache/object.c b/fs/fscache/object.c
+index 6a675652129b..7a972d144b54 100644
+--- a/fs/fscache/object.c
++++ b/fs/fscache/object.c
+@@ -798,6 +798,8 @@ void fscache_object_destroy(struct fscache_object *object)
+ }
+ EXPORT_SYMBOL(fscache_object_destroy);
+ 
++static DECLARE_WAIT_QUEUE_HEAD(fscache_object_cong_wait);
++
+ /*
+  * enqueue an object for metadata-type processing
+  */
+@@ -806,16 +808,12 @@ void fscache_enqueue_object(struct fscache_object *object)
+ 	_enter("{OBJ%x}", object->debug_id);
+ 
+ 	if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
+-		wait_queue_head_t *cong_wq =
+-			&get_cpu_var(fscache_object_cong_wait);
+ 
+ 		if (queue_work(fscache_object_wq, &object->work)) {
+ 			if (fscache_object_congested())
+-				wake_up(cong_wq);
++				wake_up(&fscache_object_cong_wait);
+ 		} else
+ 			fscache_put_object(object, fscache_obj_put_queue);
+-
+-		put_cpu_var(fscache_object_cong_wait);
+ 	}
+ }
+ 
+@@ -833,16 +831,15 @@ void fscache_enqueue_object(struct fscache_object *object)
+  */
+ bool fscache_object_sleep_till_congested(signed long *timeoutp)
+ {
+-	wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait);
+ 	DEFINE_WAIT(wait);
+ 
+ 	if (fscache_object_congested())
+ 		return true;
+ 
+-	add_wait_queue_exclusive(cong_wq, &wait);
++	add_wait_queue_exclusive(&fscache_object_cong_wait, &wait);
+ 	if (!fscache_object_congested())
+ 		*timeoutp = schedule_timeout(*timeoutp);
+-	finish_wait(cong_wq, &wait);
++	finish_wait(&fscache_object_cong_wait, &wait);
+ 
+ 	return fscache_object_congested();
+ }
+diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
+index d5294e663df5..ee8846818b34 100644
+--- a/fs/fuse/readdir.c
++++ b/fs/fuse/readdir.c
+@@ -160,7 +160,7 @@ static int fuse_direntplus_link(struct file *file,
+ 	struct inode *dir = d_inode(parent);
+ 	struct fuse_conn *fc;
+ 	struct inode *inode;
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 
+ 	if (!o->nodeid) {
+ 		/*
+diff --git a/fs/namei.c b/fs/namei.c
+index 02e99606c65b..c1d11a2e7fa3 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1635,7 +1635,7 @@ static struct dentry *__lookup_slow(const struct qstr *name,
+ {
+ 	struct dentry *dentry, *old;
+ 	struct inode *inode = dir->d_inode;
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 
+ 	/* Don't go there if it's already dead */
+ 	if (unlikely(IS_DEADDIR(inode)))
+@@ -3305,7 +3305,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
+ 	struct dentry *dentry;
+ 	int error, create_error = 0;
+ 	umode_t mode = op->mode;
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 
+ 	if (unlikely(IS_DEADDIR(dir_inode)))
+ 		return ERR_PTR(-ENOENT);
+diff --git a/fs/namespace.c b/fs/namespace.c
+index 1a9df6afb90b..373b0e738997 100644
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -344,8 +344,24 @@ int __mnt_want_write(struct vfsmount *m)
+ 	 * incremented count after it has set MNT_WRITE_HOLD.
+ 	 */
+ 	smp_mb();
+-	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
+-		cpu_relax();
++	might_lock(&mount_lock.lock);
++	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
++			cpu_relax();
++		} else {
++			/*
++			 * This prevents priority inversion, if the task
++			 * setting MNT_WRITE_HOLD got preempted on a remote
++			 * CPU, and it prevents life lock if the task setting
++			 * MNT_WRITE_HOLD has a lower priority and is bound to
++			 * the same CPU as the task that is spinning here.
++			 */
++			preempt_enable();
++			lock_mount_hash();
++			unlock_mount_hash();
++			preempt_disable();
++		}
++	}
+ 	/*
+ 	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+ 	 * be set to match its requirements. So we must not load that until
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 32c3d0c454b1..b8ff452317e6 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -637,7 +637,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
+ 		unsigned long dir_verifier)
+ {
+ 	struct qstr filename = QSTR_INIT(entry->name, entry->len);
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 	struct dentry *dentry;
+ 	struct dentry *alias;
+ 	struct inode *inode;
+@@ -1873,7 +1873,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+ 		    struct file *file, unsigned open_flags,
+ 		    umode_t mode)
+ {
+-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++	DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 	struct nfs_open_context *ctx;
+ 	struct dentry *res;
+ 	struct iattr attr = { .ia_valid = ATTR_OPEN };
+diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
+index d5ccf095b2a7..0944c068f5cb 100644
+--- a/fs/nfs/unlink.c
++++ b/fs/nfs/unlink.c
+@@ -13,7 +13,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/sched.h>
+-#include <linux/wait.h>
++#include <linux/swait.h>
+ #include <linux/namei.h>
+ #include <linux/fsnotify.h>
+ 
+@@ -184,7 +184,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name)
+ 
+ 	data->cred = get_current_cred();
+ 	data->res.dir_attr = &data->dir_attr;
+-	init_waitqueue_head(&data->wq);
++	init_swait_queue_head(&data->wq);
+ 
+ 	status = -EBUSY;
+ 	spin_lock(&dentry->d_lock);
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index 300d53ee7040..6ab25d4d4037 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -96,6 +96,7 @@
+ #include <linux/posix-timers.h>
+ #include <linux/time_namespace.h>
+ #include <linux/resctrl.h>
++#include <linux/swait.h>
+ #include <linux/cn_proc.h>
+ #include <trace/events/oom.h>
+ #include "internal.h"
+@@ -2071,7 +2072,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
+ 
+ 	child = d_hash_and_lookup(dir, &qname);
+ 	if (!child) {
+-		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++		DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 		child = d_alloc_parallel(dir, &qname, &wq);
+ 		if (IS_ERR(child))
+ 			goto end_instantiate;
+diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
+index 0b7a00ed6c49..a7828fce675a 100644
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -679,7 +679,7 @@ static bool proc_sys_fill_cache(struct file *file,
+ 
+ 	child = d_lookup(dir, &qname);
+ 	if (!child) {
+-		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
++		DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq);
+ 		child = d_alloc_parallel(dir, &qname, &wq);
+ 		if (IS_ERR(child))
+ 			return false;
+diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h
+index eceeecf6a5bd..d3e2d81656e0 100644
+--- a/include/asm-generic/softirq_stack.h
++++ b/include/asm-generic/softirq_stack.h
+@@ -2,7 +2,7 @@
+ #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
+ #define __ASM_GENERIC_SOFTIRQ_STACK_H
+ 
+-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
++#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT)
+ void do_softirq_own_stack(void);
+ #else
+ static inline void do_softirq_own_stack(void)
+diff --git a/include/linux/console.h b/include/linux/console.h
+index a97f277cfdfa..487a4266ab2c 100644
+--- a/include/linux/console.h
++++ b/include/linux/console.h
+@@ -16,6 +16,13 @@
+ 
+ #include <linux/atomic.h>
+ #include <linux/types.h>
++#include <linux/printk.h>
++#include <linux/seqlock.h>
++
++struct latched_seq {
++	seqcount_latch_t	latch;
++	u64			val[2];
++};
+ 
+ struct vc_data;
+ struct console_font_op;
+@@ -136,10 +143,12 @@ static inline int con_debug_leave(void)
+ #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
+ #define CON_BRL		(32) /* Used for a braille device */
+ #define CON_EXTENDED	(64) /* Use the extended output format a la /dev/kmsg */
++#define CON_HANDOVER	(128) /* Device was previously a boot console. */
+ 
+ struct console {
+ 	char	name[16];
+ 	void	(*write)(struct console *, const char *, unsigned);
++	void	(*write_atomic)(struct console *co, const char *s, unsigned int count);
+ 	int	(*read)(struct console *, char *, unsigned);
+ 	struct tty_driver *(*device)(struct console *, int *);
+ 	void	(*unblank)(void);
+@@ -149,6 +158,16 @@ struct console {
+ 	short	flags;
+ 	short	index;
+ 	int	cflag;
++#ifdef CONFIG_PRINTK
++	char	sync_buf[CONSOLE_LOG_MAX];
++	struct latched_seq printk_seq;
++	struct latched_seq printk_sync_seq;
++#ifdef CONFIG_HAVE_NMI
++	struct latched_seq printk_sync_nmi_seq;
++#endif
++#endif /* CONFIG_PRINTK */
++
++	struct task_struct *thread;
+ 	uint	ispeed;
+ 	uint	ospeed;
+ 	void	*data;
+diff --git a/include/linux/dcache.h b/include/linux/dcache.h
+index 9e23d33bb6f1..9f89d4887e35 100644
+--- a/include/linux/dcache.h
++++ b/include/linux/dcache.h
+@@ -108,7 +108,7 @@ struct dentry {
+ 
+ 	union {
+ 		struct list_head d_lru;		/* LRU list */
+-		wait_queue_head_t *d_wait;	/* in-lookup ones only */
++		struct swait_queue_head *d_wait;	/* in-lookup ones only */
+ 	};
+ 	struct list_head d_child;	/* child of parent list */
+ 	struct list_head d_subdirs;	/* our children */
+@@ -240,7 +240,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
+ extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
+ extern struct dentry * d_alloc_anon(struct super_block *);
+ extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
+-					wait_queue_head_t *);
++					struct swait_queue_head *);
+ extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
+ extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+ extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
+diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
+index 2e2b8d6140ed..71064a2c2caf 100644
+--- a/include/linux/entry-common.h
++++ b/include/linux/entry-common.h
+@@ -57,9 +57,15 @@
+ # define ARCH_EXIT_TO_USER_MODE_WORK		(0)
+ #endif
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++# define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
++#else
++# define _TIF_NEED_RESCHED_MASK	(_TIF_NEED_RESCHED)
++#endif
++
+ #define EXIT_TO_USER_MODE_WORK						\
+ 	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
+-	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
++	 _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
+ 	 ARCH_EXIT_TO_USER_MODE_WORK)
+ 
+ /**
+diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
+index ec2a47a81e42..8cd11a223260 100644
+--- a/include/linux/irq_work.h
++++ b/include/linux/irq_work.h
+@@ -3,6 +3,7 @@
+ #define _LINUX_IRQ_WORK_H
+ 
+ #include <linux/smp_types.h>
++#include <linux/rcuwait.h>
+ 
+ /*
+  * An entry can be in one of four states:
+@@ -16,11 +17,13 @@
+ struct irq_work {
+ 	struct __call_single_node node;
+ 	void (*func)(struct irq_work *);
++	struct rcuwait irqwait;
+ };
+ 
+ #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){	\
+ 	.node = { .u_flags = (_flags), },			\
+ 	.func = (_func),					\
++	.irqwait = __RCUWAIT_INITIALIZER(irqwait),		\
+ }
+ 
+ #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
+@@ -46,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work)
+ 	return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
+ }
+ 
++static inline bool irq_work_is_hard(struct irq_work *work)
++{
++	return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
++}
++
+ bool irq_work_queue(struct irq_work *work);
+ bool irq_work_queue_on(struct irq_work *work, int cpu);
+ 
+diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
+index 59aea39785bf..d69b819b53e0 100644
+--- a/include/linux/irqdesc.h
++++ b/include/linux/irqdesc.h
+@@ -160,6 +160,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
+ 
+ int handle_irq_desc(struct irq_desc *desc);
+ int generic_handle_irq(unsigned int irq);
++int generic_handle_irq_safe(unsigned int irq);
+ 
+ #ifdef CONFIG_IRQ_DOMAIN
+ /*
+diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
+index 747f40e0c326..5ec0fa71399e 100644
+--- a/include/linux/irqflags.h
++++ b/include/linux/irqflags.h
+@@ -71,14 +71,6 @@ do {						\
+ do {						\
+ 	__this_cpu_dec(hardirq_context);	\
+ } while (0)
+-# define lockdep_softirq_enter()		\
+-do {						\
+-	current->softirq_context++;		\
+-} while (0)
+-# define lockdep_softirq_exit()			\
+-do {						\
+-	current->softirq_context--;		\
+-} while (0)
+ 
+ # define lockdep_hrtimer_enter(__hrtimer)		\
+ ({							\
+@@ -140,6 +132,21 @@ do {						\
+ # define lockdep_irq_work_exit(__work)		do { } while (0)
+ #endif
+ 
++#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT)
++# define lockdep_softirq_enter()		\
++do {						\
++	current->softirq_context++;		\
++} while (0)
++# define lockdep_softirq_exit()			\
++do {						\
++	current->softirq_context--;		\
++} while (0)
++
++#else
++# define lockdep_softirq_enter()		do { } while (0)
++# define lockdep_softirq_exit()			do { } while (0)
++#endif
++
+ #if defined(CONFIG_IRQSOFF_TRACER) || \
+ 	defined(CONFIG_PREEMPT_TRACER)
+  extern void stop_critical_timings(void);
+diff --git a/include/linux/kernel.h b/include/linux/kernel.h
+index f56cd8879a59..49f1e924b6e6 100644
+--- a/include/linux/kernel.h
++++ b/include/linux/kernel.h
+@@ -111,8 +111,8 @@ static __always_inline void might_resched(void)
+ #endif /* CONFIG_PREEMPT_* */
+ 
+ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+-extern void ___might_sleep(const char *file, int line, int preempt_offset);
+-extern void __might_sleep(const char *file, int line, int preempt_offset);
++extern void __might_resched(const char *file, int line, unsigned int offsets);
++extern void __might_sleep(const char *file, int line);
+ extern void __cant_sleep(const char *file, int line, int preempt_offset);
+ extern void __cant_migrate(const char *file, int line);
+ 
+@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
+  * supposed to.
+  */
+ # define might_sleep() \
+-	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
++	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
+ /**
+  * cant_sleep - annotation for functions that cannot sleep
+  *
+@@ -168,10 +168,9 @@ extern void __cant_migrate(const char *file, int line);
+  */
+ # define non_block_end() WARN_ON(current->non_block_count-- == 0)
+ #else
+-  static inline void ___might_sleep(const char *file, int line,
+-				   int preempt_offset) { }
+-  static inline void __might_sleep(const char *file, int line,
+-				   int preempt_offset) { }
++  static inline void __might_resched(const char *file, int line,
++				     unsigned int offsets) { }
++static inline void __might_sleep(const char *file, int line) { }
+ # define might_sleep() do { might_resched(); } while (0)
+ # define cant_sleep() do { } while (0)
+ # define cant_migrate()		do { } while (0)
+diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
+index 258cdde8d356..9bca0d98db5a 100644
+--- a/include/linux/kgdb.h
++++ b/include/linux/kgdb.h
+@@ -212,6 +212,8 @@ extern void kgdb_call_nmi_hook(void *ignored);
+  */
+ extern void kgdb_roundup_cpus(void);
+ 
++extern void kgdb_roundup_cpu(unsigned int cpu);
++
+ /**
+  *	kgdb_arch_set_pc - Generic call back to the program counter
+  *	@regs: Current &struct pt_regs.
+@@ -365,5 +367,6 @@ extern void kgdb_free_init_mem(void);
+ #define dbg_late_init()
+ static inline void kgdb_panic(const char *msg) {}
+ static inline void kgdb_free_init_mem(void) { }
++static inline void kgdb_roundup_cpu(unsigned int cpu) {}
+ #endif /* ! CONFIG_KGDB */
+ #endif /* _KGDB_H_ */
+diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
+index 7f8ee09c711f..e9672de22cf2 100644
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -12,6 +12,7 @@
+ #include <linux/completion.h>
+ #include <linux/cpumask.h>
+ #include <linux/uprobes.h>
++#include <linux/rcupdate.h>
+ #include <linux/page-flags-layout.h>
+ #include <linux/workqueue.h>
+ #include <linux/seqlock.h>
+@@ -572,6 +573,9 @@ struct mm_struct {
+ 		bool tlb_flush_batched;
+ #endif
+ 		struct uprobes_state uprobes_state;
++#ifdef CONFIG_PREEMPT_RT
++		struct rcu_head delayed_drop;
++#endif
+ #ifdef CONFIG_HUGETLB_PAGE
+ 		atomic_long_t hugetlb_usage;
+ #endif
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index c0a4589ab706..0f9dadec3b46 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1941,7 +1941,6 @@ enum netdev_ml_priv_type {
+  *	@sfp_bus:	attached &struct sfp_bus structure.
+  *
+  *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
+- *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+  *
+  *	@proto_down:	protocol port state information can be sent to the
+  *			switch driver and used to set the phys state of the
+@@ -2272,7 +2271,6 @@ struct net_device {
+ 	struct phy_device	*phydev;
+ 	struct sfp_bus		*sfp_bus;
+ 	struct lock_class_key	*qdisc_tx_busylock;
+-	struct lock_class_key	*qdisc_running_key;
+ 	bool			proto_down;
+ 	unsigned		wol_enabled:1;
+ 	unsigned		threaded:1;
+@@ -2382,13 +2380,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
+ #define netdev_lockdep_set_classes(dev)				\
+ {								\
+ 	static struct lock_class_key qdisc_tx_busylock_key;	\
+-	static struct lock_class_key qdisc_running_key;		\
+ 	static struct lock_class_key qdisc_xmit_lock_key;	\
+ 	static struct lock_class_key dev_addr_list_lock_key;	\
+ 	unsigned int i;						\
+ 								\
+ 	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
+-	(dev)->qdisc_running_key = &qdisc_running_key;		\
+ 	lockdep_set_class(&(dev)->addr_list_lock,		\
+ 			  &dev_addr_list_lock_key);		\
+ 	for (i = 0; i < (dev)->num_tx_queues; i++)		\
+diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
+index 7fcd56c6ded6..2ccb84f15aa3 100644
+--- a/include/linux/nfs_xdr.h
++++ b/include/linux/nfs_xdr.h
+@@ -1692,7 +1692,7 @@ struct nfs_unlinkdata {
+ 	struct nfs_removeargs args;
+ 	struct nfs_removeres res;
+ 	struct dentry *dentry;
+-	wait_queue_head_t wq;
++	struct swait_queue_head wq;
+ 	const struct cred *cred;
+ 	struct nfs_fattr dir_attr;
+ 	long timeout;
+diff --git a/include/linux/preempt.h b/include/linux/preempt.h
+index 4d244e295e85..3da73c968211 100644
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -122,9 +122,10 @@
+  * The preempt_count offset after spin_lock()
+  */
+ #if !defined(CONFIG_PREEMPT_RT)
+-#define PREEMPT_LOCK_OFFSET	PREEMPT_DISABLE_OFFSET
++#define PREEMPT_LOCK_OFFSET		PREEMPT_DISABLE_OFFSET
+ #else
+-#define PREEMPT_LOCK_OFFSET	0
++/* Locks on RT do not disable preemption */
++#define PREEMPT_LOCK_OFFSET		0
+ #endif
+ 
+ /*
+@@ -174,6 +175,20 @@ extern void preempt_count_sub(int val);
+ #define preempt_count_inc() preempt_count_add(1)
+ #define preempt_count_dec() preempt_count_sub(1)
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++#define add_preempt_lazy_count(val)	do { preempt_lazy_count() += (val); } while (0)
++#define sub_preempt_lazy_count(val)	do { preempt_lazy_count() -= (val); } while (0)
++#define inc_preempt_lazy_count()	add_preempt_lazy_count(1)
++#define dec_preempt_lazy_count()	sub_preempt_lazy_count(1)
++#define preempt_lazy_count()		(current_thread_info()->preempt_lazy_count)
++#else
++#define add_preempt_lazy_count(val)	do { } while (0)
++#define sub_preempt_lazy_count(val)	do { } while (0)
++#define inc_preempt_lazy_count()	do { } while (0)
++#define dec_preempt_lazy_count()	do { } while (0)
++#define preempt_lazy_count()		(0)
++#endif
++
+ #ifdef CONFIG_PREEMPT_COUNT
+ 
+ #define preempt_disable() \
+@@ -182,13 +197,25 @@ do { \
+ 	barrier(); \
+ } while (0)
+ 
++#define preempt_lazy_disable() \
++do { \
++	inc_preempt_lazy_count(); \
++	barrier(); \
++} while (0)
++
+ #define sched_preempt_enable_no_resched() \
+ do { \
+ 	barrier(); \
+ 	preempt_count_dec(); \
+ } while (0)
+ 
+-#define preempt_enable_no_resched() sched_preempt_enable_no_resched()
++#ifndef CONFIG_PREEMPT_RT
++# define preempt_enable_no_resched() sched_preempt_enable_no_resched()
++# define preempt_check_resched_rt() barrier();
++#else
++# define preempt_enable_no_resched() preempt_enable()
++# define preempt_check_resched_rt() preempt_check_resched()
++#endif
+ 
+ #define preemptible()	(preempt_count() == 0 && !irqs_disabled())
+ 
+@@ -213,6 +240,18 @@ do { \
+ 		__preempt_schedule(); \
+ } while (0)
+ 
++/*
++ * open code preempt_check_resched() because it is not exported to modules and
++ * used by local_unlock() or bpf_enable_instrumentation().
++ */
++#define preempt_lazy_enable() \
++do { \
++	dec_preempt_lazy_count(); \
++	barrier(); \
++	if (should_resched(0)) \
++		__preempt_schedule(); \
++} while (0)
++
+ #else /* !CONFIG_PREEMPTION */
+ #define preempt_enable() \
+ do { \
+@@ -220,6 +259,12 @@ do { \
+ 	preempt_count_dec(); \
+ } while (0)
+ 
++#define preempt_lazy_enable() \
++do { \
++	dec_preempt_lazy_count(); \
++	barrier(); \
++} while (0)
++
+ #define preempt_enable_notrace() \
+ do { \
+ 	barrier(); \
+@@ -258,8 +303,12 @@ do { \
+ #define preempt_disable_notrace()		barrier()
+ #define preempt_enable_no_resched_notrace()	barrier()
+ #define preempt_enable_notrace()		barrier()
++#define preempt_check_resched_rt()		barrier()
+ #define preemptible()				0
+ 
++#define preempt_lazy_disable()			barrier()
++#define preempt_lazy_enable()			barrier()
++
+ #endif /* CONFIG_PREEMPT_COUNT */
+ 
+ #ifdef MODULE
+@@ -278,7 +327,7 @@ do { \
+ } while (0)
+ #define preempt_fold_need_resched() \
+ do { \
+-	if (tif_need_resched()) \
++	if (tif_need_resched_now()) \
+ 		set_preempt_need_resched(); \
+ } while (0)
+ 
+@@ -394,8 +443,15 @@ extern void migrate_enable(void);
+ 
+ #else
+ 
+-static inline void migrate_disable(void) { }
+-static inline void migrate_enable(void) { }
++static inline void migrate_disable(void)
++{
++	preempt_lazy_disable();
++}
++
++static inline void migrate_enable(void)
++{
++	preempt_lazy_enable();
++}
+ 
+ #endif /* CONFIG_SMP */
+ 
+diff --git a/include/linux/printk.h b/include/linux/printk.h
+index 9497f6b98339..eddfc5de6ee7 100644
+--- a/include/linux/printk.h
++++ b/include/linux/printk.h
+@@ -47,6 +47,12 @@ static inline const char *printk_skip_headers(const char *buffer)
+ 
+ #define CONSOLE_EXT_LOG_MAX	8192
+ 
++/*
++ * The maximum size of a record formatted for console printing
++ * (i.e. with the prefix prepended to every line).
++ */
++#define CONSOLE_LOG_MAX		1024
++
+ /* printk's without a loglevel use this.. */
+ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
+ 
+@@ -155,6 +161,8 @@ int vprintk(const char *fmt, va_list args);
+ asmlinkage __printf(1, 2) __cold
+ int _printk(const char *fmt, ...);
+ 
++bool pr_flush(int timeout_ms, bool reset_on_progress);
++
+ /*
+  * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
+  */
+@@ -224,6 +232,11 @@ static inline void printk_deferred_exit(void)
+ {
+ }
+ 
++static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
++{
++	return true;
++}
++
+ static inline int printk_ratelimit(void)
+ {
+ 	return 0;
+@@ -284,17 +297,30 @@ static inline void printk_trigger_flush(void)
+ extern int __printk_cpu_trylock(void);
+ extern void __printk_wait_on_cpu_lock(void);
+ extern void __printk_cpu_unlock(void);
++extern bool kgdb_roundup_delay(unsigned int cpu);
++
++#else
++
++#define __printk_cpu_trylock()		1
++#define __printk_wait_on_cpu_lock()
++#define __printk_cpu_unlock()
++
++static inline bool kgdb_roundup_delay(unsigned int cpu)
++{
++	return false;
++}
++#endif /* CONFIG_SMP */
+ 
+ /**
+- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+- *                             lock and disable interrupts.
++ * raw_printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
++ *                                 lock and disable interrupts.
+  * @flags: Stack-allocated storage for saving local interrupt state,
+- *         to be passed to printk_cpu_unlock_irqrestore().
++ *         to be passed to raw_printk_cpu_unlock_irqrestore().
+  *
+  * If the lock is owned by another CPU, spin until it becomes available.
+  * Interrupts are restored while spinning.
+  */
+-#define printk_cpu_lock_irqsave(flags)		\
++#define raw_printk_cpu_lock_irqsave(flags)	\
+ 	for (;;) {				\
+ 		local_irq_save(flags);		\
+ 		if (__printk_cpu_trylock())	\
+@@ -304,22 +330,30 @@ extern void __printk_cpu_unlock(void);
+ 	}
+ 
+ /**
+- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
+- *                                  lock and restore interrupts.
+- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
++ * raw_printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant
++ *                                      spinning lock and restore interrupts.
++ * @flags: Caller's saved interrupt state from raw_printk_cpu_lock_irqsave().
+  */
+-#define printk_cpu_unlock_irqrestore(flags)	\
++#define raw_printk_cpu_unlock_irqrestore(flags)	\
+ 	do {					\
+ 		__printk_cpu_unlock();		\
+ 		local_irq_restore(flags);	\
+-	} while (0)				\
+-
+-#else
++	} while (0)
+ 
+-#define printk_cpu_lock_irqsave(flags) ((void)flags)
+-#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
++/*
++ * Used to synchronize atomic consoles.
++ *
++ * The same as raw_printk_cpu_lock_irqsave() except that hardware interrupts
++ * are _not_ restored while spinning.
++ */
++#define console_atomic_lock(flags)		\
++	do {					\
++		local_irq_save(flags);		\
++		while (!__printk_cpu_trylock())	\
++			cpu_relax();		\
++	} while (0)
+ 
+-#endif /* CONFIG_SMP */
++#define console_atomic_unlock raw_printk_cpu_unlock_irqrestore
+ 
+ extern int kptr_restrict;
+ 
+diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h
+index f0e535f199be..002266693e50 100644
+--- a/include/linux/ratelimit_types.h
++++ b/include/linux/ratelimit_types.h
+@@ -4,7 +4,7 @@
+ 
+ #include <linux/bits.h>
+ #include <linux/param.h>
+-#include <linux/spinlock_types.h>
++#include <linux/spinlock_types_raw.h>
+ 
+ #define DEFAULT_RATELIMIT_INTERVAL	(5 * HZ)
+ #define DEFAULT_RATELIMIT_BURST		10
+diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
+index 13bddb841ceb..e33445348eb0 100644
+--- a/include/linux/rcupdate.h
++++ b/include/linux/rcupdate.h
+@@ -94,6 +94,13 @@ void rcu_init_tasks_generic(void);
+ static inline void rcu_init_tasks_generic(void) { }
+ #endif
+ 
++#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TASKS_RCU_GENERIC)
++void rcu_tasks_initiate_self_tests(void);
++#else
++static inline void rcu_tasks_initiate_self_tests(void) {}
++#endif
++
++
+ #ifdef CONFIG_RCU_STALL_COMMON
+ void rcu_sysrq_start(void);
+ void rcu_sysrq_end(void);
+diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
+index 9deedfeec2b1..7d049883a08a 100644
+--- a/include/linux/rtmutex.h
++++ b/include/linux/rtmutex.h
+@@ -99,13 +99,22 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
++extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock);
+ #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
++#define rt_mutex_lock_nest_lock(lock, nest_lock)			\
++	do {								\
++		typecheck(struct lockdep_map *, &(nest_lock)->dep_map);	\
++		_rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
++	} while (0)
++
+ #else
+ extern void rt_mutex_lock(struct rt_mutex *lock);
+ #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
++#define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock)
+ #endif
+ 
+ extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
++extern int rt_mutex_lock_killable(struct rt_mutex *lock);
+ extern int rt_mutex_trylock(struct rt_mutex *lock);
+ 
+ extern void rt_mutex_unlock(struct rt_mutex *lock);
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 7c17742d359c..2cdeb099d3c9 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -118,12 +118,8 @@ struct task_group;
+ 
+ #define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
+ 
+-#define task_is_traced(task)		((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
+-
+ #define task_is_stopped(task)		((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
+ 
+-#define task_is_stopped_or_traced(task)	((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+-
+ /*
+  * Special states are those that do not use the normal wait-loop pattern. See
+  * the comment with set_special_state().
+@@ -1084,6 +1080,10 @@ struct task_struct {
+ 	/* Restored if set_restore_sigmask() was used: */
+ 	sigset_t			saved_sigmask;
+ 	struct sigpending		pending;
++#ifdef CONFIG_PREEMPT_RT
++	/* TODO: move me into ->restart_block ? */
++	struct				kernel_siginfo forced_info;
++#endif
+ 	unsigned long			sas_ss_sp;
+ 	size_t				sas_ss_size;
+ 	unsigned int			sas_ss_flags;
+@@ -1738,6 +1738,16 @@ static __always_inline bool is_percpu_thread(void)
+ #endif
+ }
+ 
++/* Is the current task guaranteed to stay on its current CPU? */
++static inline bool is_migratable(void)
++{
++#ifdef CONFIG_SMP
++	return preemptible() && !current->migration_disabled;
++#else
++	return false;
++#endif
++}
++
+ /* Per-process atomic flags. */
+ #define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */
+ #define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */
+@@ -2013,6 +2023,118 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
+ 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ }
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
++}
++
++static inline int need_resched_lazy(void)
++{
++	return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int need_resched_now(void)
++{
++	return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++#else
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
++static inline int need_resched_lazy(void) { return 0; }
++
++static inline int need_resched_now(void)
++{
++	return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++#endif
++
++#ifdef CONFIG_PREEMPT_RT
++static inline bool task_match_saved_state(struct task_struct *p, long match_state)
++{
++	return p->saved_state == match_state;
++}
++
++static inline bool task_is_traced(struct task_struct *task)
++{
++	bool traced = false;
++
++	/* in case the task is sleeping on tasklist_lock */
++	raw_spin_lock_irq(&task->pi_lock);
++	if (READ_ONCE(task->__state) & __TASK_TRACED)
++		traced = true;
++	else if (task->saved_state & __TASK_TRACED)
++		traced = true;
++	raw_spin_unlock_irq(&task->pi_lock);
++	return traced;
++}
++
++static inline bool task_is_stopped_or_traced(struct task_struct *task)
++{
++	bool traced_stopped = false;
++	unsigned long flags;
++
++	raw_spin_lock_irqsave(&task->pi_lock, flags);
++
++	if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
++		traced_stopped = true;
++	else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
++		traced_stopped = true;
++
++	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++	return traced_stopped;
++}
++
++#else
++
++static inline bool task_match_saved_state(struct task_struct *p, long match_state)
++{
++	return false;
++}
++
++static inline bool task_is_traced(struct task_struct *task)
++{
++	return READ_ONCE(task->__state) & __TASK_TRACED;
++}
++
++static inline bool task_is_stopped_or_traced(struct task_struct *task)
++{
++	return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
++}
++#endif
++
++static inline bool task_match_state_or_saved(struct task_struct *p,
++					     long match_state)
++{
++	if (READ_ONCE(p->__state) == match_state)
++		return true;
++
++	return task_match_saved_state(p, match_state);
++}
++
++static inline bool task_match_state_lock(struct task_struct *p,
++					 long match_state)
++{
++	bool match;
++
++	raw_spin_lock_irq(&p->pi_lock);
++	match = task_match_state_or_saved(p, match_state);
++	raw_spin_unlock_irq(&p->pi_lock);
++
++	return match;
++}
++
+ /*
+  * cond_resched() and cond_resched_lock(): latency reduction via
+  * explicit rescheduling in places that are safe. The return
+@@ -2047,7 +2169,7 @@ static inline int _cond_resched(void) { return 0; }
+ #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
+ 
+ #define cond_resched() ({			\
+-	___might_sleep(__FILE__, __LINE__, 0);	\
++	__might_resched(__FILE__, __LINE__, 0);	\
+ 	_cond_resched();			\
+ })
+ 
+@@ -2055,19 +2177,38 @@ extern int __cond_resched_lock(spinlock_t *lock);
+ extern int __cond_resched_rwlock_read(rwlock_t *lock);
+ extern int __cond_resched_rwlock_write(rwlock_t *lock);
+ 
+-#define cond_resched_lock(lock) ({				\
+-	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
+-	__cond_resched_lock(lock);				\
++#define MIGHT_RESCHED_RCU_SHIFT		8
++#define MIGHT_RESCHED_PREEMPT_MASK	((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
++
++#ifndef CONFIG_PREEMPT_RT
++/*
++ * Non RT kernels have an elevated preempt count due to the held lock,
++ * but are not allowed to be inside a RCU read side critical section
++ */
++# define PREEMPT_LOCK_RESCHED_OFFSETS	PREEMPT_LOCK_OFFSET
++#else
++/*
++ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
++ * cond_resched*lock() has to take that into account because it checks for
++ * preempt_count() and rcu_preempt_depth().
++ */
++# define PREEMPT_LOCK_RESCHED_OFFSETS	\
++	(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
++#endif
++
++#define cond_resched_lock(lock) ({						\
++	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
++	__cond_resched_lock(lock);						\
+ })
+ 
+-#define cond_resched_rwlock_read(lock) ({			\
+-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
+-	__cond_resched_rwlock_read(lock);			\
++#define cond_resched_rwlock_read(lock) ({					\
++	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
++	__cond_resched_rwlock_read(lock);					\
+ })
+ 
+-#define cond_resched_rwlock_write(lock) ({			\
+-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
+-	__cond_resched_rwlock_write(lock);			\
++#define cond_resched_rwlock_write(lock) ({					\
++	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
++	__cond_resched_rwlock_write(lock);					\
+ })
+ 
+ static inline void cond_resched_rcu(void)
+diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
+index 95fb7aaaec8d..28e9cc60f47e 100644
+--- a/include/linux/sched/mm.h
++++ b/include/linux/sched/mm.h
+@@ -49,6 +49,26 @@ static inline void mmdrop(struct mm_struct *mm)
+ 		__mmdrop(mm);
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++extern void __mmdrop_delayed(struct rcu_head *rhp);
++
++/*
++ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
++ * kernels via RCU.
++ */
++static inline void mmdrop_sched(struct mm_struct *mm)
++{
++	/* Provides a full memory barrier. See mmdrop() */
++	if (atomic_dec_and_test(&mm->mm_count))
++		call_rcu(&mm->delayed_drop, __mmdrop_delayed);
++}
++#else
++static inline void mmdrop_sched(struct mm_struct *mm)
++{
++	mmdrop(mm);
++}
++#endif
++
+ /**
+  * mmget() - Pin the address space associated with a &struct mm_struct.
+  * @mm: The address space to pin.
+diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
+index 68abc6bdd891..dfe81e08e143 100644
+--- a/include/linux/serial_8250.h
++++ b/include/linux/serial_8250.h
+@@ -7,6 +7,7 @@
+ #ifndef _LINUX_SERIAL_8250_H
+ #define _LINUX_SERIAL_8250_H
+ 
++#include <linux/atomic.h>
+ #include <linux/serial_core.h>
+ #include <linux/serial_reg.h>
+ #include <linux/platform_device.h>
+@@ -126,6 +127,8 @@ struct uart_8250_port {
+ #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
+ 	unsigned char		msr_saved_flags;
+ 
++	atomic_t		console_printing;
++
+ 	struct uart_8250_dma	*dma;
+ 	const struct uart_8250_ops *ops;
+ 
+@@ -181,6 +184,8 @@ void serial8250_init_port(struct uart_8250_port *up);
+ void serial8250_set_defaults(struct uart_8250_port *up);
+ void serial8250_console_write(struct uart_8250_port *up, const char *s,
+ 			      unsigned int count);
++void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
++				     unsigned int count);
+ int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
+ int serial8250_console_exit(struct uart_port *port);
+ 
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 7ed1d4472c0c..6ac2df270a97 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -301,6 +301,7 @@ struct sk_buff_head {
+ 
+ 	__u32		qlen;
+ 	spinlock_t	lock;
++	raw_spinlock_t	raw_lock;
+ };
+ 
+ struct sk_buff;
+@@ -1993,6 +1994,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
+ 	__skb_queue_head_init(list);
+ }
+ 
++static inline void skb_queue_head_init_raw(struct sk_buff_head *list)
++{
++	raw_spin_lock_init(&list->raw_lock);
++	__skb_queue_head_init(list);
++}
++
+ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
+ 		struct lock_class_key *class)
+ {
+diff --git a/include/linux/smp.h b/include/linux/smp.h
+index 510519e8a1eb..7ac9fdb5ad09 100644
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -268,6 +268,9 @@ static inline int get_boot_cpu_id(void)
+ #define get_cpu()		({ preempt_disable(); __smp_processor_id(); })
+ #define put_cpu()		preempt_enable()
+ 
++#define get_cpu_light()		({ migrate_disable(); __smp_processor_id(); })
++#define put_cpu_light()		migrate_enable()
++
+ /*
+  * Callback to arch code if there's nosmp or maxcpus=0 on the
+  * boot command line:
+diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
+index c09b6407ae1b..7f86a2016ac5 100644
+--- a/include/linux/spinlock_types_up.h
++++ b/include/linux/spinlock_types_up.h
+@@ -1,7 +1,7 @@
+ #ifndef __LINUX_SPINLOCK_TYPES_UP_H
+ #define __LINUX_SPINLOCK_TYPES_UP_H
+ 
+-#ifndef __LINUX_SPINLOCK_TYPES_H
++#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+ # error "please don't include this file directly"
+ #endif
+ 
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index 9a073535c0bd..0536fbba7f69 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -177,7 +177,17 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
+ 	clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
+ #endif /* !CONFIG_GENERIC_ENTRY */
+ 
+-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
++#ifdef CONFIG_PREEMPT_LAZY
++#define tif_need_resched()	(test_thread_flag(TIF_NEED_RESCHED) || \
++				 test_thread_flag(TIF_NEED_RESCHED_LAZY))
++#define tif_need_resched_now()	(test_thread_flag(TIF_NEED_RESCHED))
++#define tif_need_resched_lazy()	test_thread_flag(TIF_NEED_RESCHED_LAZY)
++
++#else
++#define tif_need_resched()	test_thread_flag(TIF_NEED_RESCHED)
++#define tif_need_resched_now()	test_thread_flag(TIF_NEED_RESCHED)
++#define tif_need_resched_lazy()	0
++#endif
+ 
+ #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+ static inline int arch_within_stack_frames(const void * const stack,
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index ff137179e0c3..54fe3b1a638d 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -69,6 +69,7 @@ struct trace_entry {
+ 	unsigned char		flags;
+ 	unsigned char		preempt_count;
+ 	int			pid;
++	unsigned char		preempt_lazy_count;
+ };
+ 
+ #define TRACE_EVENT_TYPE_MAX						\
+@@ -158,9 +159,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
+ 						unsigned int trace_ctx)
+ {
+ 	entry->preempt_count		= trace_ctx & 0xff;
++	entry->preempt_lazy_count	= (trace_ctx >> 16) & 0xff;
+ 	entry->pid			= current->pid;
+ 	entry->type			= type;
+-	entry->flags =			trace_ctx >> 16;
++	entry->flags			= trace_ctx >> 24;
+ }
+ 
+ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
+@@ -173,6 +175,7 @@ enum trace_flag_type {
+ 	TRACE_FLAG_SOFTIRQ		= 0x10,
+ 	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
+ 	TRACE_FLAG_NMI			= 0x40,
++	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x80,
+ };
+ 
+ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
+index e81856c0ba13..81dc1f5e181a 100644
+--- a/include/linux/u64_stats_sync.h
++++ b/include/linux/u64_stats_sync.h
+@@ -66,7 +66,7 @@
+ #include <linux/seqlock.h>
+ 
+ struct u64_stats_sync {
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ 	seqcount_t	seq;
+ #endif
+ };
+@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
+ 	return local64_read(&p->v);
+ }
+ 
++static inline void u64_stats_set(u64_stats_t *p, u64 val)
++{
++	local64_set(&p->v, val);
++}
++
+ static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
+ {
+ 	local64_add(val, &p->v);
+@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
+ 	return p->v;
+ }
+ 
++static inline void u64_stats_set(u64_stats_t *p, u64 val)
++{
++	p->v = val;
++}
++
+ static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
+ {
+ 	p->v += val;
+@@ -115,7 +125,7 @@ static inline void u64_stats_inc(u64_stats_t *p)
+ }
+ #endif
+ 
+-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ #define u64_stats_init(syncp)	seqcount_init(&(syncp)->seq)
+ #else
+ static inline void u64_stats_init(struct u64_stats_sync *syncp)
+@@ -125,15 +135,19 @@ static inline void u64_stats_init(struct u64_stats_sync *syncp)
+ 
+ static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
+ {
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
+ 	write_seqcount_begin(&syncp->seq);
+ #endif
+ }
+ 
+ static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
+ {
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ 	write_seqcount_end(&syncp->seq);
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
+ #endif
+ }
+ 
+@@ -142,8 +156,11 @@ u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
+ {
+ 	unsigned long flags = 0;
+ 
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+-	local_irq_save(flags);
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_disable();
++	else
++		local_irq_save(flags);
+ 	write_seqcount_begin(&syncp->seq);
+ #endif
+ 	return flags;
+@@ -153,15 +170,18 @@ static inline void
+ u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
+ 				unsigned long flags)
+ {
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ 	write_seqcount_end(&syncp->seq);
+-	local_irq_restore(flags);
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		preempt_enable();
++	else
++		local_irq_restore(flags);
+ #endif
+ }
+ 
+ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+ {
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ 	return read_seqcount_begin(&syncp->seq);
+ #else
+ 	return 0;
+@@ -170,7 +190,7 @@ static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *
+ 
+ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+ {
+-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
+ 	preempt_disable();
+ #endif
+ 	return __u64_stats_fetch_begin(syncp);
+@@ -179,7 +199,7 @@ static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *sy
+ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ 					 unsigned int start)
+ {
+-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
+ 	return read_seqcount_retry(&syncp->seq, start);
+ #else
+ 	return false;
+@@ -189,7 +209,7 @@ static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+ 					 unsigned int start)
+ {
+-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
+ 	preempt_enable();
+ #endif
+ 	return __u64_stats_fetch_retry(syncp, start);
+@@ -203,7 +223,9 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+  */
+ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
+ {
+-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
++	preempt_disable();
++#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
+ 	local_irq_disable();
+ #endif
+ 	return __u64_stats_fetch_begin(syncp);
+@@ -212,7 +234,9 @@ static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync
+ static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
+ 					     unsigned int start)
+ {
+-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
++#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
++	preempt_enable();
++#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
+ 	local_irq_enable();
+ #endif
+ 	return __u64_stats_fetch_retry(syncp, start);
+diff --git a/include/net/act_api.h b/include/net/act_api.h
+index f19f7f4a463c..b5b624c7e488 100644
+--- a/include/net/act_api.h
++++ b/include/net/act_api.h
+@@ -30,13 +30,13 @@ struct tc_action {
+ 	atomic_t			tcfa_bindcnt;
+ 	int				tcfa_action;
+ 	struct tcf_t			tcfa_tm;
+-	struct gnet_stats_basic_packed	tcfa_bstats;
+-	struct gnet_stats_basic_packed	tcfa_bstats_hw;
++	struct gnet_stats_basic_sync	tcfa_bstats;
++	struct gnet_stats_basic_sync	tcfa_bstats_hw;
+ 	struct gnet_stats_queue		tcfa_qstats;
+ 	struct net_rate_estimator __rcu *tcfa_rate_est;
+ 	spinlock_t			tcfa_lock;
+-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+-	struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
++	struct gnet_stats_basic_sync __percpu *cpu_bstats;
++	struct gnet_stats_basic_sync __percpu *cpu_bstats_hw;
+ 	struct gnet_stats_queue __percpu *cpu_qstats;
+ 	struct tc_cookie	__rcu *act_cookie;
+ 	struct tcf_chain	__rcu *goto_chain;
+@@ -206,7 +206,7 @@ static inline void tcf_action_update_bstats(struct tc_action *a,
+ 					    struct sk_buff *skb)
+ {
+ 	if (likely(a->cpu_bstats)) {
+-		bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), skb);
++		bstats_update(this_cpu_ptr(a->cpu_bstats), skb);
+ 		return;
+ 	}
+ 	spin_lock(&a->tcfa_lock);
+diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
+index 1424e02cef90..7aa2b8e1fb29 100644
+--- a/include/net/gen_stats.h
++++ b/include/net/gen_stats.h
+@@ -7,14 +7,17 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/pkt_sched.h>
+ 
+-/* Note: this used to be in include/uapi/linux/gen_stats.h */
+-struct gnet_stats_basic_packed {
+-	__u64	bytes;
+-	__u64	packets;
+-};
+-
+-struct gnet_stats_basic_cpu {
+-	struct gnet_stats_basic_packed bstats;
++/* Throughput stats.
++ * Must be initialized beforehand with gnet_stats_basic_sync_init().
++ *
++ * If no reads can ever occur parallel to writes (e.g. stack-allocated
++ * bstats), then the internal stat values can be written to and read
++ * from directly. Otherwise, use _bstats_set/update() for writes and
++ * gnet_stats_add_basic() for reads.
++ */
++struct gnet_stats_basic_sync {
++	u64_stats_t bytes;
++	u64_stats_t packets;
+ 	struct u64_stats_sync syncp;
+ } __aligned(2 * sizeof(u64));
+ 
+@@ -34,6 +37,7 @@ struct gnet_dump {
+ 	struct tc_stats   tc_stats;
+ };
+ 
++void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b);
+ int gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+ 			  struct gnet_dump *d, int padattr);
+ 
+@@ -42,41 +46,38 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
+ 				 spinlock_t *lock, struct gnet_dump *d,
+ 				 int padattr);
+ 
+-int gnet_stats_copy_basic(const seqcount_t *running,
+-			  struct gnet_dump *d,
+-			  struct gnet_stats_basic_cpu __percpu *cpu,
+-			  struct gnet_stats_basic_packed *b);
+-void __gnet_stats_copy_basic(const seqcount_t *running,
+-			     struct gnet_stats_basic_packed *bstats,
+-			     struct gnet_stats_basic_cpu __percpu *cpu,
+-			     struct gnet_stats_basic_packed *b);
+-int gnet_stats_copy_basic_hw(const seqcount_t *running,
+-			     struct gnet_dump *d,
+-			     struct gnet_stats_basic_cpu __percpu *cpu,
+-			     struct gnet_stats_basic_packed *b);
++int gnet_stats_copy_basic(struct gnet_dump *d,
++			  struct gnet_stats_basic_sync __percpu *cpu,
++			  struct gnet_stats_basic_sync *b, bool running);
++void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
++			  struct gnet_stats_basic_sync __percpu *cpu,
++			  struct gnet_stats_basic_sync *b, bool running);
++int gnet_stats_copy_basic_hw(struct gnet_dump *d,
++			     struct gnet_stats_basic_sync __percpu *cpu,
++			     struct gnet_stats_basic_sync *b, bool running);
+ int gnet_stats_copy_rate_est(struct gnet_dump *d,
+ 			     struct net_rate_estimator __rcu **ptr);
+ int gnet_stats_copy_queue(struct gnet_dump *d,
+ 			  struct gnet_stats_queue __percpu *cpu_q,
+ 			  struct gnet_stats_queue *q, __u32 qlen);
+-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+-			     const struct gnet_stats_queue __percpu *cpu_q,
+-			     const struct gnet_stats_queue *q, __u32 qlen);
++void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
++			  const struct gnet_stats_queue __percpu *cpu_q,
++			  const struct gnet_stats_queue *q);
+ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
+ 
+ int gnet_stats_finish_copy(struct gnet_dump *d);
+ 
+-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+-		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
++		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ 		      struct net_rate_estimator __rcu **rate_est,
+ 		      spinlock_t *lock,
+-		      seqcount_t *running, struct nlattr *opt);
++		      bool running, struct nlattr *opt);
+ void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
+-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+-			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
++			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ 			  struct net_rate_estimator __rcu **ptr,
+ 			  spinlock_t *lock,
+-			  seqcount_t *running, struct nlattr *opt);
++			  bool running, struct nlattr *opt);
+ bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
+ bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
+ 			struct gnet_stats_rate_est64 *sample);
+diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
+index 832ab69efda5..4c3809e141f4 100644
+--- a/include/net/netfilter/xt_rateest.h
++++ b/include/net/netfilter/xt_rateest.h
+@@ -6,7 +6,7 @@
+ 
+ struct xt_rateest {
+ 	/* keep lock and bstats on same cache line to speedup xt_rateest_tg() */
+-	struct gnet_stats_basic_packed	bstats;
++	struct gnet_stats_basic_sync	bstats;
+ 	spinlock_t			lock;
+ 
+ 
+diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
+index 83a6d0792180..4a5833108083 100644
+--- a/include/net/pkt_cls.h
++++ b/include/net/pkt_cls.h
+@@ -765,7 +765,7 @@ struct tc_cookie {
+ };
+ 
+ struct tc_qopt_offload_stats {
+-	struct gnet_stats_basic_packed *bstats;
++	struct gnet_stats_basic_sync *bstats;
+ 	struct gnet_stats_queue *qstats;
+ };
+ 
+@@ -885,7 +885,7 @@ struct tc_gred_qopt_offload_params {
+ };
+ 
+ struct tc_gred_qopt_offload_stats {
+-	struct gnet_stats_basic_packed bstats[MAX_DPs];
++	struct gnet_stats_basic_sync bstats[MAX_DPs];
+ 	struct gnet_stats_queue qstats[MAX_DPs];
+ 	struct red_stats *xstats[MAX_DPs];
+ };
+diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
+index 6906da5c733e..e9fe7a613eba 100644
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -40,6 +40,13 @@ enum qdisc_state_t {
+ 	__QDISC_STATE_DRAINING,
+ };
+ 
++enum qdisc_state2_t {
++	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
++	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
++	 */
++	__QDISC_STATE2_RUNNING,
++};
++
+ #define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
+ #define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)
+ 
+@@ -97,7 +104,7 @@ struct Qdisc {
+ 	struct netdev_queue	*dev_queue;
+ 
+ 	struct net_rate_estimator __rcu *rate_est;
+-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
++	struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ 	struct gnet_stats_queue	__percpu *cpu_qstats;
+ 	int			pad;
+ 	refcount_t		refcnt;
+@@ -107,10 +114,10 @@ struct Qdisc {
+ 	 */
+ 	struct sk_buff_head	gso_skb ____cacheline_aligned_in_smp;
+ 	struct qdisc_skb_head	q;
+-	struct gnet_stats_basic_packed bstats;
+-	seqcount_t		running;
++	struct gnet_stats_basic_sync bstats;
+ 	struct gnet_stats_queue	qstats;
+ 	unsigned long		state;
++	unsigned long		state2; /* must be written under qdisc spinlock */
+ 	struct Qdisc            *next_sched;
+ 	struct sk_buff_head	skb_bad_txq;
+ 
+@@ -143,11 +150,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
+ 	return NULL;
+ }
+ 
++/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
++ * root_lock section, or provide their own memory barriers -- ordering
++ * against qdisc_run_begin/end() atomic bit operations.
++ */
+ static inline bool qdisc_is_running(struct Qdisc *qdisc)
+ {
+ 	if (qdisc->flags & TCQ_F_NOLOCK)
+ 		return spin_is_locked(&qdisc->seqlock);
+-	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
++	return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ }
+ 
+ static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
+@@ -167,6 +178,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
+ 	return !READ_ONCE(qdisc->q.qlen);
+ }
+ 
++/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
++ * the qdisc root lock acquired.
++ */
+ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ {
+ 	if (qdisc->flags & TCQ_F_NOLOCK) {
+@@ -186,15 +200,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ 		 * when testing it in qdisc_run_end()
+ 		 */
+ 		return spin_trylock(&qdisc->seqlock);
+-	} else if (qdisc_is_running(qdisc)) {
+-		return false;
+ 	}
+-	/* Variant of write_seqcount_begin() telling lockdep a trylock
+-	 * was attempted.
+-	 */
+-	raw_write_seqcount_begin(&qdisc->running);
+-	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
+-	return true;
++	return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ }
+ 
+ static inline void qdisc_run_end(struct Qdisc *qdisc)
+@@ -212,7 +219,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
+ 				      &qdisc->state)))
+ 			__netif_schedule(qdisc);
+ 	} else {
+-		write_seqcount_end(&qdisc->running);
++		__clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+ 	}
+ }
+ 
+@@ -576,14 +583,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
+ 	return qdisc_lock(root);
+ }
+ 
+-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+-{
+-	struct Qdisc *root = qdisc_root_sleeping(qdisc);
+-
+-	ASSERT_RTNL();
+-	return &root->running;
+-}
+-
+ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
+ {
+ 	return qdisc->dev_queue->dev;
+@@ -833,14 +832,16 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ 	return sch->enqueue(skb, sch, to_free);
+ }
+ 
+-static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
++static inline void _bstats_update(struct gnet_stats_basic_sync *bstats,
+ 				  __u64 bytes, __u32 packets)
+ {
+-	bstats->bytes += bytes;
+-	bstats->packets += packets;
++	u64_stats_update_begin(&bstats->syncp);
++	u64_stats_add(&bstats->bytes, bytes);
++	u64_stats_add(&bstats->packets, packets);
++	u64_stats_update_end(&bstats->syncp);
+ }
+ 
+-static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
++static inline void bstats_update(struct gnet_stats_basic_sync *bstats,
+ 				 const struct sk_buff *skb)
+ {
+ 	_bstats_update(bstats,
+@@ -848,26 +849,10 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
+ 		       skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1);
+ }
+ 
+-static inline void _bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+-				      __u64 bytes, __u32 packets)
+-{
+-	u64_stats_update_begin(&bstats->syncp);
+-	_bstats_update(&bstats->bstats, bytes, packets);
+-	u64_stats_update_end(&bstats->syncp);
+-}
+-
+-static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+-				     const struct sk_buff *skb)
+-{
+-	u64_stats_update_begin(&bstats->syncp);
+-	bstats_update(&bstats->bstats, skb);
+-	u64_stats_update_end(&bstats->syncp);
+-}
+-
+ static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
+ 					   const struct sk_buff *skb)
+ {
+-	bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(sch->cpu_bstats), skb);
+ }
+ 
+ static inline void qdisc_bstats_update(struct Qdisc *sch,
+@@ -956,10 +941,9 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
+ 					     __u32 *backlog)
+ {
+ 	struct gnet_stats_queue qstats = { 0 };
+-	__u32 len = qdisc_qlen_sum(sch);
+ 
+-	__gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
+-	*qlen = qstats.qlen;
++	gnet_stats_add_queue(&qstats, sch->cpu_qstats, &sch->qstats);
++	*qlen = qstats.qlen + qdisc_qlen(sch);
+ 	*backlog = qstats.backlog;
+ }
+ 
+@@ -1304,7 +1288,7 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64);
+ struct mini_Qdisc {
+ 	struct tcf_proto *filter_list;
+ 	struct tcf_block *block;
+-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
++	struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ 	struct gnet_stats_queue	__percpu *cpu_qstats;
+ 	struct rcu_head rcu;
+ };
+@@ -1312,7 +1296,7 @@ struct mini_Qdisc {
+ static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
+ 						const struct sk_buff *skb)
+ {
+-	bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+ }
+ 
+ static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
+diff --git a/init/Kconfig b/init/Kconfig
+index dafc3ba6fa7a..cd852df4e7d4 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -910,7 +910,7 @@ config NUMA_BALANCING
+ 	bool "Memory placement aware NUMA scheduler"
+ 	depends on ARCH_SUPPORTS_NUMA_BALANCING
+ 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
+-	depends on SMP && NUMA && MIGRATION
++	depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
+ 	help
+ 	  This option adds support for automatic NUMA aware memory/task placement.
+ 	  The mechanism is quite primitive and is based on migrating memory when
+@@ -1905,6 +1905,7 @@ choice
+ 
+ config SLAB
+ 	bool "SLAB"
++	depends on !PREEMPT_RT
+ 	select HAVE_HARDENED_USERCOPY_ALLOCATOR
+ 	help
+ 	  The regular slab allocator that is established and known to work
+@@ -1925,6 +1926,7 @@ config SLUB
+ config SLOB
+ 	depends on EXPERT
+ 	bool "SLOB (Simple Allocator)"
++	depends on !PREEMPT_RT
+ 	help
+ 	   SLOB replaces the stock allocator with a drastically simpler
+ 	   allocator. SLOB is generally more space efficient but
+diff --git a/init/main.c b/init/main.c
+index 649d9e4201a8..ee92d608ffc4 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -1606,6 +1606,7 @@ static noinline void __init kernel_init_freeable(void)
+ 
+ 	rcu_init_tasks_generic();
+ 	do_pre_smp_initcalls();
++	rcu_tasks_initiate_self_tests();
+ 	lockup_detector_init();
+ 
+ 	smp_init();
+diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
+index 5876e30c5740..5df0776264c2 100644
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -1,5 +1,11 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+ 
++config HAVE_PREEMPT_LAZY
++	bool
++
++config PREEMPT_LAZY
++	def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
++
+ choice
+ 	prompt "Preemption Model"
+ 	default PREEMPT_NONE
+diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
+index 1486768f2318..bb3b805436c4 100644
+--- a/kernel/cgroup/rstat.c
++++ b/kernel/cgroup/rstat.c
+@@ -156,8 +156,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
+ 		raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
+ 						       cpu);
+ 		struct cgroup *pos = NULL;
++		unsigned long flags;
+ 
+-		raw_spin_lock(cpu_lock);
++		raw_spin_lock_irqsave(cpu_lock, flags);
+ 		while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
+ 			struct cgroup_subsys_state *css;
+ 
+@@ -169,7 +170,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
+ 				css->ss->css_rstat_flush(css, cpu);
+ 			rcu_read_unlock();
+ 		}
+-		raw_spin_unlock(cpu_lock);
++		raw_spin_unlock_irqrestore(cpu_lock, flags);
+ 
+ 		/* if @may_sleep, play nice and yield if necessary */
+ 		if (may_sleep && (need_resched() ||
+diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
+index 7beceb447211..28497c00e63b 100644
+--- a/kernel/debug/debug_core.c
++++ b/kernel/debug/debug_core.c
+@@ -239,35 +239,42 @@ NOKPROBE_SYMBOL(kgdb_call_nmi_hook);
+ static DEFINE_PER_CPU(call_single_data_t, kgdb_roundup_csd) =
+ 	CSD_INIT(kgdb_call_nmi_hook, NULL);
+ 
+-void __weak kgdb_roundup_cpus(void)
++void __weak kgdb_roundup_cpu(unsigned int cpu)
+ {
+ 	call_single_data_t *csd;
++	int ret;
++
++	csd = &per_cpu(kgdb_roundup_csd, cpu);
++
++	/*
++	 * If it didn't round up last time, don't try again
++	 * since smp_call_function_single_async() will block.
++	 *
++	 * If rounding_up is false then we know that the
++	 * previous call must have at least started and that
++	 * means smp_call_function_single_async() won't block.
++	 */
++	if (kgdb_info[cpu].rounding_up)
++		return;
++	kgdb_info[cpu].rounding_up = true;
++
++	ret = smp_call_function_single_async(cpu, csd);
++	if (ret)
++		kgdb_info[cpu].rounding_up = false;
++}
++NOKPROBE_SYMBOL(kgdb_roundup_cpu);
++
++void __weak kgdb_roundup_cpus(void)
++{
+ 	int this_cpu = raw_smp_processor_id();
+ 	int cpu;
+-	int ret;
+ 
+ 	for_each_online_cpu(cpu) {
+ 		/* No need to roundup ourselves */
+ 		if (cpu == this_cpu)
+ 			continue;
+ 
+-		csd = &per_cpu(kgdb_roundup_csd, cpu);
+-
+-		/*
+-		 * If it didn't round up last time, don't try again
+-		 * since smp_call_function_single_async() will block.
+-		 *
+-		 * If rounding_up is false then we know that the
+-		 * previous call must have at least started and that
+-		 * means smp_call_function_single_async() won't block.
+-		 */
+-		if (kgdb_info[cpu].rounding_up)
+-			continue;
+-		kgdb_info[cpu].rounding_up = true;
+-
+-		ret = smp_call_function_single_async(cpu, csd);
+-		if (ret)
+-			kgdb_info[cpu].rounding_up = false;
++		kgdb_roundup_cpu(cpu);
+ 	}
+ }
+ NOKPROBE_SYMBOL(kgdb_roundup_cpus);
+diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
+index 6735ac36b718..539a2f0dc89d 100644
+--- a/kernel/debug/kdb/kdb_io.c
++++ b/kernel/debug/kdb/kdb_io.c
+@@ -559,23 +559,17 @@ static void kdb_msg_write(const char *msg, int msg_len)
+ 		cp++;
+ 	}
+ 
++	/* mirror output on atomic consoles */
+ 	for_each_console(c) {
+ 		if (!(c->flags & CON_ENABLED))
+ 			continue;
+ 		if (c == dbg_io_ops->cons)
+ 			continue;
+-		/*
+-		 * Set oops_in_progress to encourage the console drivers to
+-		 * disregard their internal spin locks: in the current calling
+-		 * context the risk of deadlock is a bigger problem than risks
+-		 * due to re-entering the console driver. We operate directly on
+-		 * oops_in_progress rather than using bust_spinlocks() because
+-		 * the calls bust_spinlocks() makes on exit are not appropriate
+-		 * for this calling context.
+-		 */
+-		++oops_in_progress;
+-		c->write(c, msg, msg_len);
+-		--oops_in_progress;
++
++		if (!c->write_atomic)
++			continue;
++		c->write_atomic(c, msg, msg_len);
++
+ 		touch_nmi_watchdog();
+ 	}
+ }
+diff --git a/kernel/entry/common.c b/kernel/entry/common.c
+index e002bea6b4be..51ddfdacfc1f 100644
+--- a/kernel/entry/common.c
++++ b/kernel/entry/common.c
+@@ -159,9 +159,17 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ 
+ 		local_irq_enable_exit_to_user(ti_work);
+ 
+-		if (ti_work & _TIF_NEED_RESCHED)
++		if (ti_work & _TIF_NEED_RESCHED_MASK)
+ 			schedule();
+ 
++#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
++		if (unlikely(current->forced_info.si_signo)) {
++			struct task_struct *t = current;
++			force_sig_info(&t->forced_info);
++			t->forced_info.si_signo = 0;
++		}
++#endif
++
+ 		if (ti_work & _TIF_UPROBE)
+ 			uprobe_notify_resume(regs);
+ 
+@@ -388,7 +396,7 @@ void irqentry_exit_cond_resched(void)
+ 		rcu_irq_exit_check_preempt();
+ 		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
+ 			WARN_ON_ONCE(!on_thread_stack());
+-		if (need_resched())
++		if (should_resched(0))
+ 			preempt_schedule_irq();
+ 	}
+ }
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 80efdfda6662..6ff17e977392 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -64,6 +64,7 @@
+ #include <linux/rcuwait.h>
+ #include <linux/compat.h>
+ #include <linux/io_uring.h>
++#include <linux/kprobes.h>
+ #include <linux/sysfs.h>
+ 
+ #include <linux/uaccess.h>
+@@ -215,8 +216,14 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
+ {
+ 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+ 
++	kprobe_flush_task(tsk);
+ 	perf_event_delayed_put(tsk);
+ 	trace_sched_process_free(tsk);
++
++	/* RT enabled kernels delay freeing the VMAP'ed task stack */
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		put_task_stack(tsk);
++
+ 	put_task_struct(tsk);
+ }
+ 
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 1906230a000e..47f647e6a6c3 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -289,7 +289,10 @@ static inline void free_thread_stack(struct task_struct *tsk)
+ 			return;
+ 		}
+ 
+-		vfree_atomic(tsk->stack);
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++			vfree_atomic(tsk->stack);
++		else
++			vfree(tsk->stack);
+ 		return;
+ 	}
+ #endif
+@@ -709,6 +712,19 @@ void __mmdrop(struct mm_struct *mm)
+ }
+ EXPORT_SYMBOL_GPL(__mmdrop);
+ 
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
++ * by far the least expensive way to do that.
++ */
++void __mmdrop_delayed(struct rcu_head *rhp)
++{
++	struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
++
++	__mmdrop(mm);
++}
++#endif
++
+ static void mmdrop_async_fn(struct work_struct *work)
+ {
+ 	struct mm_struct *mm;
+diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
+index 7a45fd593245..23dc888e0885 100644
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -664,6 +664,29 @@ int generic_handle_irq(unsigned int irq)
+ }
+ EXPORT_SYMBOL_GPL(generic_handle_irq);
+ 
++/**
++ * generic_handle_irq_safe - Invoke the handler for a particular irq from any
++ *			     context.
++ * @irq:	The irq number to handle
++ *
++ * Returns:	0 on success, a negative value on error.
++ *
++ * This function can be called from any context (IRQ or process context). It
++ * will report an error if not invoked from IRQ context and the irq has been
++ * marked to enforce IRQ-context only.
++ */
++int generic_handle_irq_safe(unsigned int irq)
++{
++	unsigned long flags;
++	int ret;
++
++	local_irq_save(flags);
++	ret = handle_irq_desc(irq_to_desc(irq));
++	local_irq_restore(flags);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(generic_handle_irq_safe);
++
+ #ifdef CONFIG_IRQ_DOMAIN
+ /**
+  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 9862372e0f01..78d90ac0528c 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -1301,6 +1301,8 @@ static int irq_thread(void *data)
+ 
+ 	irq_thread_set_ready(desc, action);
+ 
++	sched_set_fifo(current);
++
+ 	if (force_irqthreads() && test_bit(IRQTF_FORCED_THREAD,
+ 					   &action->thread_flags))
+ 		handler_fn = irq_forced_thread_fn;
+@@ -1466,8 +1468,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
+ 	if (IS_ERR(t))
+ 		return PTR_ERR(t);
+ 
+-	sched_set_fifo(t);
+-
+ 	/*
+ 	 * We keep the reference to the task struct even if
+ 	 * the thread dies to avoid that the interrupt code
+@@ -2861,7 +2861,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state);
+  *	This call sets the internal irqchip state of an interrupt,
+  *	depending on the value of @which.
+  *
+- *	This function should be called with preemption disabled if the
++ *	This function should be called with migration disabled if the
+  *	interrupt controller has per-cpu registers.
+  */
+ int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
+diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
+index c481d8458325..02b2daf07441 100644
+--- a/kernel/irq/spurious.c
++++ b/kernel/irq/spurious.c
+@@ -447,6 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
+ 
+ static int __init irqfixup_setup(char *str)
+ {
++	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++		pr_warn("irqfixup boot option not supported with PREEMPT_RT\n");
++		return 1;
++	}
+ 	irqfixup = 1;
+ 	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+ 	printk(KERN_WARNING "This may impact system performance.\n");
+@@ -459,6 +463,10 @@ module_param(irqfixup, int, 0644);
+ 
+ static int __init irqpoll_setup(char *str)
+ {
++	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++		pr_warn("irqpoll boot option not supported with PREEMPT_RT\n");
++		return 1;
++	}
+ 	irqfixup = 2;
+ 	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+ 				"enabled\n");
+diff --git a/kernel/irq_work.c b/kernel/irq_work.c
+index db8c248ebc8c..f7df715ec28e 100644
+--- a/kernel/irq_work.c
++++ b/kernel/irq_work.c
+@@ -18,11 +18,36 @@
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
+ #include <linux/smp.h>
++#include <linux/smpboot.h>
+ #include <asm/processor.h>
+ #include <linux/kasan.h>
+ 
+ static DEFINE_PER_CPU(struct llist_head, raised_list);
+ static DEFINE_PER_CPU(struct llist_head, lazy_list);
++static DEFINE_PER_CPU(struct task_struct *, irq_workd);
++
++static void wake_irq_workd(void)
++{
++	struct task_struct *tsk = __this_cpu_read(irq_workd);
++
++	if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
++		wake_up_process(tsk);
++}
++
++#ifdef CONFIG_SMP
++static void irq_work_wake(struct irq_work *entry)
++{
++	wake_irq_workd();
++}
++
++static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
++	IRQ_WORK_INIT_HARD(irq_work_wake);
++#endif
++
++static int irq_workd_should_run(unsigned int cpu)
++{
++	return !llist_empty(this_cpu_ptr(&lazy_list));
++}
+ 
+ /*
+  * Claim the entry so that no one else will poke at it.
+@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void)
+ /* Enqueue on current CPU, work must already be claimed and preempt disabled */
+ static void __irq_work_queue_local(struct irq_work *work)
+ {
++	struct llist_head *list;
++	bool rt_lazy_work = false;
++	bool lazy_work = false;
++	int work_flags;
++
++	work_flags = atomic_read(&work->node.a_flags);
++	if (work_flags & IRQ_WORK_LAZY)
++		lazy_work = true;
++	else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++		 !(work_flags & IRQ_WORK_HARD_IRQ))
++		rt_lazy_work = true;
++
++	if (lazy_work || rt_lazy_work)
++		list = this_cpu_ptr(&lazy_list);
++	else
++		list = this_cpu_ptr(&raised_list);
++
++	if (!llist_add(&work->node.llist, list))
++		return;
++
+ 	/* If the work is "lazy", handle it from next tick if any */
+-	if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
+-		if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
+-		    tick_nohz_tick_stopped())
+-			arch_irq_work_raise();
+-	} else {
+-		if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
+-			arch_irq_work_raise();
+-	}
++	if (!lazy_work || tick_nohz_tick_stopped())
++		arch_irq_work_raise();
+ }
+ 
+ /* Enqueue the irq work @work on the current CPU */
+@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
+ 	if (cpu != smp_processor_id()) {
+ 		/* Arch remote IPI send/receive backend aren't NMI safe */
+ 		WARN_ON_ONCE(in_nmi());
++
++		/*
++		 * On PREEMPT_RT the items which are not marked as
++		 * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
++		 * item is used on the remote CPU to wake the thread.
++		 */
++		if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++		    !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
++
++			if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
++				goto out;
++
++			work = &per_cpu(irq_work_wakeup, cpu);
++			if (!irq_work_claim(work))
++				goto out;
++		}
++
+ 		__smp_call_single_queue(cpu, &work->node.llist);
+ 	} else {
+ 		__irq_work_queue_local(work);
+ 	}
++out:
+ 	preempt_enable();
+ 
+ 	return true;
+ #endif /* CONFIG_SMP */
+ }
+ 
+-
+ bool irq_work_needs_cpu(void)
+ {
+ 	struct llist_head *raised, *lazy;
+@@ -160,6 +216,10 @@ void irq_work_single(void *arg)
+ 	 * else claimed it meanwhile.
+ 	 */
+ 	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
++
++	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
++	    !arch_irq_work_has_interrupt())
++		rcuwait_wake_up(&work->irqwait);
+ }
+ 
+ static void irq_work_run_list(struct llist_head *list)
+@@ -167,7 +227,12 @@ static void irq_work_run_list(struct llist_head *list)
+ 	struct irq_work *work, *tmp;
+ 	struct llist_node *llnode;
+ 
+-	BUG_ON(!irqs_disabled());
++	/*
++	 * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
++	 * in a per-CPU thread in preemptible context. Only the items which are
++	 * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
++	 */
++	BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
+ 
+ 	if (llist_empty(list))
+ 		return;
+@@ -184,7 +249,10 @@ static void irq_work_run_list(struct llist_head *list)
+ void irq_work_run(void)
+ {
+ 	irq_work_run_list(this_cpu_ptr(&raised_list));
+-	irq_work_run_list(this_cpu_ptr(&lazy_list));
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		irq_work_run_list(this_cpu_ptr(&lazy_list));
++	else
++		wake_irq_workd();
+ }
+ EXPORT_SYMBOL_GPL(irq_work_run);
+ 
+@@ -194,7 +262,11 @@ void irq_work_tick(void)
+ 
+ 	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
+ 		irq_work_run_list(raised);
+-	irq_work_run_list(this_cpu_ptr(&lazy_list));
++
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++		irq_work_run_list(this_cpu_ptr(&lazy_list));
++	else
++		wake_irq_workd();
+ }
+ 
+ /*
+@@ -204,8 +276,42 @@ void irq_work_tick(void)
+ void irq_work_sync(struct irq_work *work)
+ {
+ 	lockdep_assert_irqs_enabled();
++	might_sleep();
++
++	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
++	    !arch_irq_work_has_interrupt()) {
++		rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
++				   TASK_UNINTERRUPTIBLE);
++		return;
++	}
+ 
+ 	while (irq_work_is_busy(work))
+ 		cpu_relax();
+ }
+ EXPORT_SYMBOL_GPL(irq_work_sync);
++
++static void run_irq_workd(unsigned int cpu)
++{
++	irq_work_run_list(this_cpu_ptr(&lazy_list));
++}
++
++static void irq_workd_setup(unsigned int cpu)
++{
++	sched_set_fifo_low(current);
++}
++
++static struct smp_hotplug_thread irqwork_threads = {
++	.store                  = &irq_workd,
++	.setup			= irq_workd_setup,
++	.thread_should_run      = irq_workd_should_run,
++	.thread_fn              = run_irq_workd,
++	.thread_comm            = "irq_work/%u",
++};
++
++static __init int irq_work_init_threads(void)
++{
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
++	return 0;
++}
++early_initcall(irq_work_init_threads);
+diff --git a/kernel/kcov.c b/kernel/kcov.c
+index 80bfe71bbe13..36ca640c4f8e 100644
+--- a/kernel/kcov.c
++++ b/kernel/kcov.c
+@@ -88,6 +88,7 @@ static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas);
+ 
+ struct kcov_percpu_data {
+ 	void			*irq_area;
++	local_lock_t		lock;
+ 
+ 	unsigned int		saved_mode;
+ 	unsigned int		saved_size;
+@@ -96,7 +97,9 @@ struct kcov_percpu_data {
+ 	int			saved_sequence;
+ };
+ 
+-static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data);
++static DEFINE_PER_CPU(struct kcov_percpu_data, kcov_percpu_data) = {
++	.lock = INIT_LOCAL_LOCK(lock),
++};
+ 
+ /* Must be called with kcov_remote_lock locked. */
+ static struct kcov_remote *kcov_remote_find(u64 handle)
+@@ -824,7 +827,7 @@ void kcov_remote_start(u64 handle)
+ 	if (!in_task() && !in_serving_softirq())
+ 		return;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ 
+ 	/*
+ 	 * Check that kcov_remote_start() is not called twice in background
+@@ -832,7 +835,7 @@ void kcov_remote_start(u64 handle)
+ 	 */
+ 	mode = READ_ONCE(t->kcov_mode);
+ 	if (WARN_ON(in_task() && kcov_mode_enabled(mode))) {
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		return;
+ 	}
+ 	/*
+@@ -841,14 +844,15 @@ void kcov_remote_start(u64 handle)
+ 	 * happened while collecting coverage from a background thread.
+ 	 */
+ 	if (WARN_ON(in_serving_softirq() && t->kcov_softirq)) {
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		return;
+ 	}
+ 
+ 	spin_lock(&kcov_remote_lock);
+ 	remote = kcov_remote_find(handle);
+ 	if (!remote) {
+-		spin_unlock_irqrestore(&kcov_remote_lock, flags);
++		spin_unlock(&kcov_remote_lock);
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		return;
+ 	}
+ 	kcov_debug("handle = %llx, context: %s\n", handle,
+@@ -869,19 +873,19 @@ void kcov_remote_start(u64 handle)
+ 		size = CONFIG_KCOV_IRQ_AREA_SIZE;
+ 		area = this_cpu_ptr(&kcov_percpu_data)->irq_area;
+ 	}
+-	spin_unlock_irqrestore(&kcov_remote_lock, flags);
++	spin_unlock(&kcov_remote_lock);
+ 
+ 	/* Can only happen when in_task(). */
+ 	if (!area) {
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		area = vmalloc(size * sizeof(unsigned long));
+ 		if (!area) {
+ 			kcov_put(kcov);
+ 			return;
+ 		}
++		local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ 	}
+ 
+-	local_irq_save(flags);
+-
+ 	/* Reset coverage size. */
+ 	*(u64 *)area = 0;
+ 
+@@ -891,7 +895,7 @@ void kcov_remote_start(u64 handle)
+ 	}
+ 	kcov_start(t, kcov, size, area, mode, sequence);
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 
+ }
+ EXPORT_SYMBOL(kcov_remote_start);
+@@ -965,12 +969,12 @@ void kcov_remote_stop(void)
+ 	if (!in_task() && !in_serving_softirq())
+ 		return;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(&kcov_percpu_data.lock, flags);
+ 
+ 	mode = READ_ONCE(t->kcov_mode);
+ 	barrier();
+ 	if (!kcov_mode_enabled(mode)) {
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		return;
+ 	}
+ 	/*
+@@ -978,12 +982,12 @@ void kcov_remote_stop(void)
+ 	 * actually found the remote handle and started collecting coverage.
+ 	 */
+ 	if (in_serving_softirq() && !t->kcov_softirq) {
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		return;
+ 	}
+ 	/* Make sure that kcov_softirq is only set when in softirq. */
+ 	if (WARN_ON(!in_serving_softirq() && t->kcov_softirq)) {
+-		local_irq_restore(flags);
++		local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 		return;
+ 	}
+ 
+@@ -1013,7 +1017,7 @@ void kcov_remote_stop(void)
+ 		spin_unlock(&kcov_remote_lock);
+ 	}
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&kcov_percpu_data.lock, flags);
+ 
+ 	/* Get in kcov_remote_start(). */
+ 	kcov_put(kcov);
+@@ -1034,8 +1038,8 @@ static int __init kcov_init(void)
+ 	int cpu;
+ 
+ 	for_each_possible_cpu(cpu) {
+-		void *area = vmalloc(CONFIG_KCOV_IRQ_AREA_SIZE *
+-				sizeof(unsigned long));
++		void *area = vmalloc_node(CONFIG_KCOV_IRQ_AREA_SIZE *
++				sizeof(unsigned long), cpu_to_node(cpu));
+ 		if (!area)
+ 			return -ENOMEM;
+ 		per_cpu_ptr(&kcov_percpu_data, cpu)->irq_area = area;
+diff --git a/kernel/kprobes.c b/kernel/kprobes.c
+index 7e9fa1b7ff67..d83e818ffbdb 100644
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -1248,10 +1248,10 @@ void kprobe_busy_end(void)
+ }
+ 
+ /*
+- * This function is called from finish_task_switch when task tk becomes dead,
+- * so that we can recycle any function-return probe instances associated
+- * with this task. These left over instances represent probed functions
+- * that have been called but will never return.
++ * This function is called from delayed_put_task_struct() when a task is
++ * dead and cleaned up to recycle any function-return probe instances
++ * associated with this task. These left over instances represent probed
++ * functions that have been called but will never return.
+  */
+ void kprobe_flush_task(struct task_struct *tk)
+ {
+diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
+index e20c19e3ba49..777168d58f02 100644
+--- a/kernel/ksysfs.c
++++ b/kernel/ksysfs.c
+@@ -143,6 +143,15 @@ KERNEL_ATTR_RO(vmcoreinfo);
+ 
+ #endif /* CONFIG_CRASH_CORE */
+ 
++#if defined(CONFIG_PREEMPT_RT)
++static ssize_t realtime_show(struct kobject *kobj,
++			     struct kobj_attribute *attr, char *buf)
++{
++	return sprintf(buf, "%d\n", 1);
++}
++KERNEL_ATTR_RO(realtime);
++#endif
++
+ /* whether file capabilities are enabled */
+ static ssize_t fscaps_show(struct kobject *kobj,
+ 				  struct kobj_attribute *attr, char *buf)
+@@ -233,6 +242,9 @@ static struct attribute * kernel_attrs[] = {
+ #ifndef CONFIG_TINY_RCU
+ 	&rcu_expedited_attr.attr,
+ 	&rcu_normal_attr.attr,
++#endif
++#ifdef CONFIG_PREEMPT_RT
++	&realtime_attr.attr,
+ #endif
+ 	NULL
+ };
+diff --git a/kernel/kthread.c b/kernel/kthread.c
+index e319a1b62586..c3870b2a150d 100644
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -270,6 +270,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
+ 
+ static int kthread(void *_create)
+ {
++	static const struct sched_param param = { .sched_priority = 0 };
+ 	/* Copy data: it's on kthread's stack */
+ 	struct kthread_create_info *create = _create;
+ 	int (*threadfn)(void *data) = create->threadfn;
+@@ -300,6 +301,13 @@ static int kthread(void *_create)
+ 	init_completion(&self->parked);
+ 	current->vfork_done = &self->exited;
+ 
++	/*
++	 * The new thread inherited kthreadd's priority and CPU mask. Reset
++	 * back to default in case they have been changed.
++	 */
++	sched_setscheduler_nocheck(current, SCHED_NORMAL, &param);
++	set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
++
+ 	/* OK, tell user we're spawned, wait for stop or wakeup */
+ 	__set_current_state(TASK_UNINTERRUPTIBLE);
+ 	create->result = current;
+@@ -397,7 +405,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ 	}
+ 	task = create->result;
+ 	if (!IS_ERR(task)) {
+-		static const struct sched_param param = { .sched_priority = 0 };
+ 		char name[TASK_COMM_LEN];
+ 
+ 		/*
+@@ -406,13 +413,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
+ 		 */
+ 		vsnprintf(name, sizeof(name), namefmt, args);
+ 		set_task_comm(task, name);
+-		/*
+-		 * root may have changed our (kthreadd's) priority or CPU mask.
+-		 * The kernel thread should not inherit these properties.
+-		 */
+-		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
+-		set_cpus_allowed_ptr(task,
+-				     housekeeping_cpumask(HK_FLAG_KTHREAD));
+ 	}
+ 	kfree(create);
+ 	return task;
+diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
+index e6a282bc1665..ce3c8a4a5506 100644
+--- a/kernel/locking/lockdep.c
++++ b/kernel/locking/lockdep.c
+@@ -5470,6 +5470,7 @@ static noinstr void check_flags(unsigned long flags)
+ 		}
+ 	}
+ 
++#ifndef CONFIG_PREEMPT_RT
+ 	/*
+ 	 * We dont accurately track softirq state in e.g.
+ 	 * hardirq contexts (such as on 4KSTACKS), so only
+@@ -5484,6 +5485,7 @@ static noinstr void check_flags(unsigned long flags)
+ 			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+ 		}
+ 	}
++#endif
+ 
+ 	if (!debug_locks)
+ 		print_irqtrace_events(current);
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index b7fa3ee3aa1d..108b963a783b 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -1135,8 +1135,26 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+ 	 * which is wrong, as the other waiter is not in a deadlock
+ 	 * situation.
+ 	 */
+-	if (owner == task)
++	if (owner == task) {
++#if defined(DEBUG_WW_MUTEXES) && defined(CONFIG_DEBUG_LOCKING_API_SELFTESTS)
++		/*
++		 * The lockdep selftest for ww-mutex assumes in a few cases
++		 * the ww_ctx->contending_lock assignment via
++		 * __ww_mutex_check_kill() which does not happen if the rtmutex
++		 * detects the deadlock early.
++		 */
++		if (build_ww_mutex() && ww_ctx) {
++			struct rt_mutex *rtm;
++
++			/* Check whether the waiter should backout immediately */
++			rtm = container_of(lock, struct rt_mutex, rtmutex);
++
++			__ww_mutex_add_waiter(waiter, rtm, ww_ctx);
++			__ww_mutex_check_kill(rtm, waiter, ww_ctx);
++		}
++#endif
+ 		return -EDEADLK;
++	}
+ 
+ 	raw_spin_lock(&task->pi_lock);
+ 	waiter->task = task;
+diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
+index a461be2f873d..cb9fdff76a8a 100644
+--- a/kernel/locking/rtmutex_api.c
++++ b/kernel/locking/rtmutex_api.c
+@@ -21,12 +21,13 @@ int max_lock_depth = 1024;
+  */
+ static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
+ 						  unsigned int state,
++						  struct lockdep_map *nest_lock,
+ 						  unsigned int subclass)
+ {
+ 	int ret;
+ 
+ 	might_sleep();
+-	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_);
+ 	ret = __rt_mutex_lock(&lock->rtmutex, state);
+ 	if (ret)
+ 		mutex_release(&lock->dep_map, _RET_IP_);
+@@ -48,10 +49,16 @@ EXPORT_SYMBOL(rt_mutex_base_init);
+  */
+ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
+ {
+-	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
++	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
+ 
++void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)
++{
++	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);
++}
++EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
++
+ #else /* !CONFIG_DEBUG_LOCK_ALLOC */
+ 
+ /**
+@@ -61,7 +68,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
+  */
+ void __sched rt_mutex_lock(struct rt_mutex *lock)
+ {
+-	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
++	__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_lock);
+ #endif
+@@ -77,10 +84,25 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
+  */
+ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
+ {
+-	return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
++	return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0);
+ }
+ EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
+ 
++/**
++ * rt_mutex_lock_killable - lock a rt_mutex killable
++ *
++ * @lock:		the rt_mutex to be locked
++ *
++ * Returns:
++ *  0		on success
++ * -EINTR	when interrupted by a signal
++ */
++int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
++{
++	return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0);
++}
++EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
++
+ /**
+  * rt_mutex_trylock - try to lock a rt_mutex
+  *
+diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
+index d2912e44d61f..9e396a09fe0f 100644
+--- a/kernel/locking/spinlock_rt.c
++++ b/kernel/locking/spinlock_rt.c
+@@ -24,6 +24,17 @@
+ #define RT_MUTEX_BUILD_SPINLOCKS
+ #include "rtmutex.c"
+ 
++/*
++ * __might_resched() skips the state check as rtlocks are state
++ * preserving. Take RCU nesting into account as spin/read/write_lock() can
++ * legitimately nest into an RCU read side critical section.
++ */
++#define RTLOCK_RESCHED_OFFSETS						\
++	(rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT)
++
++#define rtlock_might_resched()						\
++	__might_resched(__FILE__, __LINE__, RTLOCK_RESCHED_OFFSETS)
++
+ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
+ {
+ 	if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
+@@ -32,7 +43,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
+ 
+ static __always_inline void __rt_spin_lock(spinlock_t *lock)
+ {
+-	___might_sleep(__FILE__, __LINE__, 0);
++	rtlock_might_resched();
+ 	rtlock_lock(&lock->lock);
+ 	rcu_read_lock();
+ 	migrate_disable();
+@@ -210,7 +221,7 @@ EXPORT_SYMBOL(rt_write_trylock);
+ 
+ void __sched rt_read_lock(rwlock_t *rwlock)
+ {
+-	___might_sleep(__FILE__, __LINE__, 0);
++	rtlock_might_resched();
+ 	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
+ 	rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+ 	rcu_read_lock();
+@@ -220,7 +231,7 @@ EXPORT_SYMBOL(rt_read_lock);
+ 
+ void __sched rt_write_lock(rwlock_t *rwlock)
+ {
+-	___might_sleep(__FILE__, __LINE__, 0);
++	rtlock_might_resched();
+ 	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
+ 	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
+ 	rcu_read_lock();
+@@ -246,12 +257,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock)
+ }
+ EXPORT_SYMBOL(rt_write_unlock);
+ 
+-int __sched rt_rwlock_is_contended(rwlock_t *rwlock)
+-{
+-	return rw_base_is_contended(&rwlock->rwbase);
+-}
+-EXPORT_SYMBOL(rt_rwlock_is_contended);
+-
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ void __rt_rwlock_init(rwlock_t *rwlock, const char *name,
+ 		      struct lock_class_key *key)
+diff --git a/kernel/panic.c b/kernel/panic.c
+index 47933d4c769b..ea5269f486cc 100644
+--- a/kernel/panic.c
++++ b/kernel/panic.c
+@@ -245,12 +245,27 @@ void check_panic_on_warn(const char *origin)
+ void panic(const char *fmt, ...)
+ {
+ 	static char buf[1024];
++	va_list args2;
+ 	va_list args;
+ 	long i, i_next = 0, len;
+ 	int state = 0;
+ 	int old_cpu, this_cpu;
+ 	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
+ 
++	console_verbose();
++	pr_emerg("Kernel panic - not syncing:\n");
++	va_start(args2, fmt);
++	va_copy(args, args2);
++	vprintk(fmt, args2);
++	va_end(args2);
++#ifdef CONFIG_DEBUG_BUGVERBOSE
++	/*
++	 * Avoid nested stack-dumping if a panic occurs during oops processing
++	 */
++	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
++		dump_stack();
++#endif
++	pr_flush(1000, true);
+ 	if (panic_on_warn) {
+ 		/*
+ 		 * This thread may hit another WARN() in the panic path.
+@@ -291,24 +306,13 @@ void panic(const char *fmt, ...)
+ 	if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
+ 		panic_smp_self_stop();
+ 
+-	console_verbose();
+ 	bust_spinlocks(1);
+-	va_start(args, fmt);
+ 	len = vscnprintf(buf, sizeof(buf), fmt, args);
+ 	va_end(args);
+ 
+ 	if (len && buf[len - 1] == '\n')
+ 		buf[len - 1] = '\0';
+ 
+-	pr_emerg("Kernel panic - not syncing: %s\n", buf);
+-#ifdef CONFIG_DEBUG_BUGVERBOSE
+-	/*
+-	 * Avoid nested stack-dumping if a panic occurs during oops processing
+-	 */
+-	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
+-		dump_stack();
+-#endif
+-
+ 	/*
+ 	 * If kgdb is enabled, give it a chance to run before we stop all
+ 	 * the other CPUs or else we won't be able to debug processes left
+@@ -617,9 +621,11 @@ static u64 oops_id;
+ 
+ static int init_oops_id(void)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ 	if (!oops_id)
+ 		get_random_bytes(&oops_id, sizeof(oops_id));
+ 	else
++#endif
+ 		oops_id++;
+ 
+ 	return 0;
+@@ -630,6 +636,7 @@ static void print_oops_end_marker(void)
+ {
+ 	init_oops_id();
+ 	pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
++	pr_flush(1000, true);
+ }
+ 
+ /*
+diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
+index 8d856b7c2e5a..7f27cfee283e 100644
+--- a/kernel/printk/printk.c
++++ b/kernel/printk/printk.c
+@@ -44,6 +44,9 @@
+ #include <linux/irq_work.h>
+ #include <linux/ctype.h>
+ #include <linux/uio.h>
++#include <linux/kgdb.h>
++#include <linux/kthread.h>
++#include <linux/clocksource.h>
+ #include <linux/sched/clock.h>
+ #include <linux/sched/debug.h>
+ #include <linux/sched/task_stack.h>
+@@ -268,11 +271,6 @@ static void __up_console_sem(unsigned long ip)
+  */
+ static int console_locked, console_suspended;
+ 
+-/*
+- * If exclusive_console is non-NULL then only this console is to be printed to.
+- */
+-static struct console *exclusive_console;
+-
+ /*
+  *	Array of consoles built from command line options (console=)
+  */
+@@ -352,10 +350,13 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
+  * non-prinatable characters are escaped in the "\xff" notation.
+  */
+ 
++#ifdef CONFIG_PRINTK
+ /* syslog_lock protects syslog_* variables and write access to clear_seq. */
+ static DEFINE_MUTEX(syslog_lock);
+ 
+-#ifdef CONFIG_PRINTK
++/* Set to enable sync mode. Once set, it is never cleared. */
++static bool sync_mode;
++
+ DECLARE_WAIT_QUEUE_HEAD(log_wait);
+ /* All 3 protected by @syslog_lock. */
+ /* the next printk record to read by syslog(READ) or /proc/kmsg */
+@@ -363,17 +364,6 @@ static u64 syslog_seq;
+ static size_t syslog_partial;
+ static bool syslog_time;
+ 
+-/* All 3 protected by @console_sem. */
+-/* the next printk record to write to the console */
+-static u64 console_seq;
+-static u64 exclusive_console_stop_seq;
+-static unsigned long console_dropped;
+-
+-struct latched_seq {
+-	seqcount_latch_t	latch;
+-	u64			val[2];
+-};
+-
+ /*
+  * The next printk record to read after the last 'clear' command. There are
+  * two copies (updated with seqcount_latch) so that reads can locklessly
+@@ -391,9 +381,6 @@ static struct latched_seq clear_seq = {
+ #define PREFIX_MAX		32
+ #endif
+ 
+-/* the maximum size of a formatted record (i.e. with prefix added per line) */
+-#define CONSOLE_LOG_MAX		1024
+-
+ /* the maximum size allowed to be reserved for a record */
+ #define LOG_LINE_MAX		(CONSOLE_LOG_MAX - PREFIX_MAX)
+ 
+@@ -437,7 +424,7 @@ bool printk_percpu_data_ready(void)
+ 	return __printk_percpu_data_ready;
+ }
+ 
+-/* Must be called under syslog_lock. */
++/* Must be called under associated write-protection lock. */
+ static void latched_seq_write(struct latched_seq *ls, u64 val)
+ {
+ 	raw_write_seqcount_latch(&ls->latch);
+@@ -1771,188 +1758,152 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
+ 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
+ }
+ 
+-/*
+- * Special console_lock variants that help to reduce the risk of soft-lockups.
+- * They allow to pass console_lock to another printk() call using a busy wait.
+- */
++int printk_delay_msec __read_mostly;
+ 
+-#ifdef CONFIG_LOCKDEP
+-static struct lockdep_map console_owner_dep_map = {
+-	.name = "console_owner"
+-};
+-#endif
++static inline void printk_delay(int level)
++{
++	boot_delay_msec(level);
+ 
+-static DEFINE_RAW_SPINLOCK(console_owner_lock);
+-static struct task_struct *console_owner;
+-static bool console_waiter;
++	if (unlikely(printk_delay_msec)) {
++		int m = printk_delay_msec;
+ 
+-/**
+- * console_lock_spinning_enable - mark beginning of code where another
+- *	thread might safely busy wait
+- *
+- * This basically converts console_lock into a spinlock. This marks
+- * the section where the console_lock owner can not sleep, because
+- * there may be a waiter spinning (like a spinlock). Also it must be
+- * ready to hand over the lock at the end of the section.
+- */
+-static void console_lock_spinning_enable(void)
++		while (m--) {
++			mdelay(1);
++			touch_nmi_watchdog();
++		}
++	}
++}
++
++static bool kernel_sync_mode(void)
+ {
+-	raw_spin_lock(&console_owner_lock);
+-	console_owner = current;
+-	raw_spin_unlock(&console_owner_lock);
++	return (oops_in_progress || sync_mode);
++}
+ 
+-	/* The waiter may spin on us after setting console_owner */
+-	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
++static bool console_may_sync(struct console *con)
++{
++	if (!(con->flags & CON_ENABLED))
++		return false;
++	if (con->write_atomic && kernel_sync_mode())
++		return true;
++	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
++		return true;
++	if (con->write && (con->flags & CON_BOOT) && !con->thread)
++		return true;
++	return false;
+ }
+ 
+-/**
+- * console_lock_spinning_disable_and_check - mark end of code where another
+- *	thread was able to busy wait and check if there is a waiter
+- *
+- * This is called at the end of the section where spinning is allowed.
+- * It has two functions. First, it is a signal that it is no longer
+- * safe to start busy waiting for the lock. Second, it checks if
+- * there is a busy waiter and passes the lock rights to her.
+- *
+- * Important: Callers lose the lock if there was a busy waiter.
+- *	They must not touch items synchronized by console_lock
+- *	in this case.
+- *
+- * Return: 1 if the lock rights were passed, 0 otherwise.
+- */
+-static int console_lock_spinning_disable_and_check(void)
++static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
+ {
+-	int waiter;
++	if (!(con->flags & CON_ENABLED))
++		return false;
+ 
+-	raw_spin_lock(&console_owner_lock);
+-	waiter = READ_ONCE(console_waiter);
+-	console_owner = NULL;
+-	raw_spin_unlock(&console_owner_lock);
++	if (con->write_atomic && kernel_sync_mode()) {
++		con->write_atomic(con, text, text_len);
++		return true;
++	}
+ 
+-	if (!waiter) {
+-		spin_release(&console_owner_dep_map, _THIS_IP_);
+-		return 0;
++	if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) {
++		if (console_trylock()) {
++			con->write_atomic(con, text, text_len);
++			console_unlock();
++			return true;
++		}
++
++	} else if (con->write && (con->flags & CON_BOOT) && !con->thread) {
++		if (console_trylock()) {
++			con->write(con, text, text_len);
++			console_unlock();
++			return true;
++		}
+ 	}
+ 
+-	/* The waiter is now free to continue */
+-	WRITE_ONCE(console_waiter, false);
++	return false;
++}
+ 
+-	spin_release(&console_owner_dep_map, _THIS_IP_);
++static bool have_atomic_console(void)
++{
++	struct console *con;
+ 
+-	/*
+-	 * Hand off console_lock to waiter. The waiter will perform
+-	 * the up(). After this, the waiter is the console_lock owner.
+-	 */
+-	mutex_release(&console_lock_dep_map, _THIS_IP_);
+-	return 1;
++	for_each_console(con) {
++		if (!(con->flags & CON_ENABLED))
++			continue;
++		if (con->write_atomic)
++			return true;
++	}
++	return false;
+ }
+ 
+-/**
+- * console_trylock_spinning - try to get console_lock by busy waiting
+- *
+- * This allows to busy wait for the console_lock when the current
+- * owner is running in specially marked sections. It means that
+- * the current owner is running and cannot reschedule until it
+- * is ready to lose the lock.
+- *
+- * Return: 1 if we got the lock, 0 othrewise
+- */
+-static int console_trylock_spinning(void)
++static bool print_sync(struct console *con, u64 *seq)
+ {
+-	struct task_struct *owner = NULL;
+-	bool waiter;
+-	bool spin = false;
+-	unsigned long flags;
++	struct printk_info info;
++	struct printk_record r;
++	size_t text_len;
+ 
+-	if (console_trylock())
+-		return 1;
++	prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
+ 
+-	printk_safe_enter_irqsave(flags);
++	if (!prb_read_valid(prb, *seq, &r))
++		return false;
+ 
+-	raw_spin_lock(&console_owner_lock);
+-	owner = READ_ONCE(console_owner);
+-	waiter = READ_ONCE(console_waiter);
+-	if (!waiter && owner && owner != current) {
+-		WRITE_ONCE(console_waiter, true);
+-		spin = true;
+-	}
+-	raw_spin_unlock(&console_owner_lock);
++	text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+ 
+-	/*
+-	 * If there is an active printk() writing to the
+-	 * consoles, instead of having it write our data too,
+-	 * see if we can offload that load from the active
+-	 * printer, and do some printing ourselves.
+-	 * Go into a spin only if there isn't already a waiter
+-	 * spinning, and there is an active printer, and
+-	 * that active printer isn't us (recursive printk?).
+-	 */
+-	if (!spin) {
+-		printk_safe_exit_irqrestore(flags);
+-		return 0;
+-	}
++	if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
++		return false;
+ 
+-	/* We spin waiting for the owner to release us */
+-	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
+-	/* Owner will clear console_waiter on hand off */
+-	while (READ_ONCE(console_waiter))
+-		cpu_relax();
+-	spin_release(&console_owner_dep_map, _THIS_IP_);
++	*seq = r.info->seq;
+ 
+-	printk_safe_exit_irqrestore(flags);
+-	/*
+-	 * The owner passed the console lock to us.
+-	 * Since we did not spin on console lock, annotate
+-	 * this as a trylock. Otherwise lockdep will
+-	 * complain.
+-	 */
+-	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
++	touch_softlockup_watchdog_sync();
++	clocksource_touch_watchdog();
++	rcu_cpu_stall_reset();
++	touch_nmi_watchdog();
+ 
+-	return 1;
++	if (text_len)
++		printk_delay(r.info->level);
++
++	return true;
+ }
+ 
+-/*
+- * Call the console drivers, asking them to write out
+- * log_buf[start] to log_buf[end - 1].
+- * The console_lock must be held.
+- */
+-static void call_console_drivers(const char *ext_text, size_t ext_len,
+-				 const char *text, size_t len)
++static u64 read_console_seq(struct console *con)
+ {
+-	static char dropped_text[64];
+-	size_t dropped_len = 0;
+-	struct console *con;
++	u64 seq2;
++	u64 seq;
+ 
+-	trace_console_rcuidle(text, len);
++	seq = latched_seq_read_nolock(&con->printk_seq);
++	seq2 = latched_seq_read_nolock(&con->printk_sync_seq);
++	if (seq2 > seq)
++		seq = seq2;
++#ifdef CONFIG_HAVE_NMI
++	seq2 = latched_seq_read_nolock(&con->printk_sync_nmi_seq);
++	if (seq2 > seq)
++		seq = seq2;
++#endif
++	return seq;
++}
+ 
+-	if (!console_drivers)
+-		return;
++static void print_sync_until(struct console *con, u64 seq, bool is_locked)
++{
++	u64 printk_seq;
+ 
+-	if (console_dropped) {
+-		dropped_len = snprintf(dropped_text, sizeof(dropped_text),
+-				       "** %lu printk messages dropped **\n",
+-				       console_dropped);
+-		console_dropped = 0;
+-	}
++	while (!__printk_cpu_trylock())
++		cpu_relax();
+ 
+-	for_each_console(con) {
+-		if (exclusive_console && con != exclusive_console)
+-			continue;
+-		if (!(con->flags & CON_ENABLED))
+-			continue;
+-		if (!con->write)
+-			continue;
+-		if (!cpu_online(smp_processor_id()) &&
+-		    !(con->flags & CON_ANYTIME))
+-			continue;
+-		if (con->flags & CON_EXTENDED)
+-			con->write(con, ext_text, ext_len);
+-		else {
+-			if (dropped_len)
+-				con->write(con, dropped_text, dropped_len);
+-			con->write(con, text, len);
+-		}
++	for (;;) {
++		printk_seq = read_console_seq(con);
++		if (printk_seq >= seq)
++			break;
++		if (!print_sync(con, &printk_seq))
++			break;
++
++		if (is_locked)
++			latched_seq_write(&con->printk_seq, printk_seq + 1);
++#ifdef CONFIG_PRINTK_NMI
++		else if (in_nmi())
++			latched_seq_write(&con->printk_sync_nmi_seq, printk_seq + 1);
++#endif
++		else
++			latched_seq_write(&con->printk_sync_seq, printk_seq + 1);
+ 	}
++
++	__printk_cpu_unlock();
+ }
+ 
+ /*
+@@ -2025,20 +1976,6 @@ static u8 *__printk_recursion_counter(void)
+ 		local_irq_restore(flags);		\
+ 	} while (0)
+ 
+-int printk_delay_msec __read_mostly;
+-
+-static inline void printk_delay(void)
+-{
+-	if (unlikely(printk_delay_msec)) {
+-		int m = printk_delay_msec;
+-
+-		while (m--) {
+-			mdelay(1);
+-			touch_nmi_watchdog();
+-		}
+-	}
+-}
+-
+ static inline u32 printk_caller_id(void)
+ {
+ 	return in_task() ? task_pid_nr(current) :
+@@ -2126,6 +2063,7 @@ int vprintk_store(int facility, int level,
+ 	const u32 caller_id = printk_caller_id();
+ 	struct prb_reserved_entry e;
+ 	enum printk_info_flags flags = 0;
++	bool final_commit = false;
+ 	struct printk_record r;
+ 	unsigned long irqflags;
+ 	u16 trunc_msg_len = 0;
+@@ -2136,6 +2074,7 @@ int vprintk_store(int facility, int level,
+ 	u16 text_len;
+ 	int ret = 0;
+ 	u64 ts_nsec;
++	u64 seq;
+ 
+ 	/*
+ 	 * Since the duration of printk() can vary depending on the message
+@@ -2174,6 +2113,7 @@ int vprintk_store(int facility, int level,
+ 	if (flags & LOG_CONT) {
+ 		prb_rec_init_wr(&r, reserve_size);
+ 		if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
++			seq = r.info->seq;
+ 			text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
+ 						 facility, &flags, fmt, args);
+ 			r.info->text_len += text_len;
+@@ -2181,6 +2121,7 @@ int vprintk_store(int facility, int level,
+ 			if (flags & LOG_NEWLINE) {
+ 				r.info->flags |= LOG_NEWLINE;
+ 				prb_final_commit(&e);
++				final_commit = true;
+ 			} else {
+ 				prb_commit(&e);
+ 			}
+@@ -2204,6 +2145,7 @@ int vprintk_store(int facility, int level,
+ 		if (!prb_reserve(&e, prb, &r))
+ 			goto out;
+ 	}
++	seq = r.info->seq;
+ 
+ 	/* fill message */
+ 	text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
+@@ -2219,13 +2161,25 @@ int vprintk_store(int facility, int level,
+ 		memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
+ 
+ 	/* A message without a trailing newline can be continued. */
+-	if (!(flags & LOG_NEWLINE))
++	if (!(flags & LOG_NEWLINE)) {
+ 		prb_commit(&e);
+-	else
++	} else {
+ 		prb_final_commit(&e);
++		final_commit = true;
++	}
+ 
+ 	ret = text_len + trunc_msg_len;
+ out:
++	/* only the kernel may perform synchronous printing */
++	if (facility == 0 && final_commit) {
++		struct console *con;
++
++		for_each_console(con) {
++			if (console_may_sync(con))
++				print_sync_until(con, seq + 1, false);
++		}
++	}
++
+ 	printk_exit_irqrestore(recursion_ptr, irqflags);
+ 	return ret;
+ }
+@@ -2235,40 +2189,16 @@ asmlinkage int vprintk_emit(int facility, int level,
+ 			    const char *fmt, va_list args)
+ {
+ 	int printed_len;
+-	bool in_sched = false;
+ 
+ 	/* Suppress unimportant messages after panic happens */
+ 	if (unlikely(suppress_printk))
+ 		return 0;
+ 
+-	if (level == LOGLEVEL_SCHED) {
++	if (level == LOGLEVEL_SCHED)
+ 		level = LOGLEVEL_DEFAULT;
+-		in_sched = true;
+-	}
+-
+-	boot_delay_msec(level);
+-	printk_delay();
+ 
+ 	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
+ 
+-	/* If called from the scheduler, we can not call up(). */
+-	if (!in_sched) {
+-		/*
+-		 * Disable preemption to avoid being preempted while holding
+-		 * console_sem which would prevent anyone from printing to
+-		 * console
+-		 */
+-		preempt_disable();
+-		/*
+-		 * Try to acquire and then immediately release the console
+-		 * semaphore.  The release will print out buffers and wake up
+-		 * /dev/kmsg and syslog() users.
+-		 */
+-		if (console_trylock_spinning())
+-			console_unlock();
+-		preempt_enable();
+-	}
+-
+ 	wake_up_klogd();
+ 	return printed_len;
+ }
+@@ -2293,37 +2223,162 @@ asmlinkage __visible int _printk(const char *fmt, ...)
+ }
+ EXPORT_SYMBOL(_printk);
+ 
+-#else /* CONFIG_PRINTK */
++static int printk_kthread_func(void *data)
++{
++	struct console *con = data;
++	unsigned long dropped = 0;
++	char *dropped_text = NULL;
++	struct printk_info info;
++	struct printk_record r;
++	char *ext_text = NULL;
++	size_t dropped_len;
++	int ret = -ENOMEM;
++	char *text = NULL;
++	char *write_text;
++	size_t len;
++	int error;
++	u64 seq;
++
++	if (con->flags & CON_EXTENDED) {
++		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
++		if (!ext_text)
++			goto out;
++	}
++	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
++	dropped_text = kmalloc(64, GFP_KERNEL);
++	if (!text || !dropped_text)
++		goto out;
++	if (con->flags & CON_EXTENDED)
++		write_text = ext_text;
++	else
++		write_text = text;
++
++	seq = read_console_seq(con);
+ 
+-#define CONSOLE_LOG_MAX		0
+-#define printk_time		false
++	prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
+ 
+-#define prb_read_valid(rb, seq, r)	false
+-#define prb_first_valid_seq(rb)		0
++	for (;;) {
++		error = wait_event_interruptible(log_wait,
++						 prb_read_valid(prb, seq, &r) || kthread_should_stop());
+ 
+-static u64 syslog_seq;
+-static u64 console_seq;
+-static u64 exclusive_console_stop_seq;
+-static unsigned long console_dropped;
++		if (kthread_should_stop())
++			break;
++
++		if (error)
++			continue;
++
++		if (seq != r.info->seq) {
++			dropped += r.info->seq - seq;
++			seq = r.info->seq;
++		}
++
++		seq++;
++
++		if (!(con->flags & CON_ENABLED))
++			continue;
++
++		if (suppress_message_printing(r.info->level))
++			continue;
++
++		if (con->flags & CON_EXTENDED) {
++			len = info_print_ext_header(ext_text,
++						    CONSOLE_EXT_LOG_MAX,
++						    r.info);
++			len += msg_print_ext_body(ext_text + len,
++						  CONSOLE_EXT_LOG_MAX - len,
++						  &r.text_buf[0], r.info->text_len,
++						  &r.info->dev_info);
++		} else {
++			len = record_print_text(&r,
++						console_msg_format & MSG_FORMAT_SYSLOG,
++						printk_time);
++		}
++
++		console_lock();
++
++		/*
++		 * Even though the printk kthread is always preemptible, it is
++		 * still not allowed to call cond_resched() from within
++		 * console drivers. The task may become non-preemptible in the
++		 * console driver call chain. For example, vt_console_print()
++		 * takes a spinlock and then can call into fbcon_redraw(),
++		 * which can conditionally invoke cond_resched().
++		 */
++		console_may_schedule = 0;
++
++		if (kernel_sync_mode() && con->write_atomic) {
++			console_unlock();
++			break;
++		}
++
++		if (!(con->flags & CON_EXTENDED) && dropped) {
++			dropped_len = snprintf(dropped_text, 64,
++					       "** %lu printk messages dropped **\n",
++					       dropped);
++			dropped = 0;
++
++			con->write(con, dropped_text, dropped_len);
++			printk_delay(r.info->level);
++		}
++
++		con->write(con, write_text, len);
++		if (len)
++			printk_delay(r.info->level);
+ 
+-static size_t record_print_text(const struct printk_record *r,
+-				bool syslog, bool time)
++		latched_seq_write(&con->printk_seq, seq);
++
++		console_unlock();
++	}
++	ret = 0;
++out:
++	kfree(dropped_text);
++	kfree(text);
++	kfree(ext_text);
++	pr_info("%sconsole [%s%d]: printing thread stopped\n",
++		(con->flags & CON_BOOT) ? "boot" : "",
++		con->name, con->index);
++	return ret;
++}
++
++/* Must be called within console_lock(). */
++static void start_printk_kthread(struct console *con)
+ {
+-	return 0;
++	con->thread = kthread_run(printk_kthread_func, con,
++				  "pr/%s%d", con->name, con->index);
++	if (IS_ERR(con->thread)) {
++		pr_err("%sconsole [%s%d]: unable to start printing thread\n",
++		       (con->flags & CON_BOOT) ? "boot" : "",
++		       con->name, con->index);
++		return;
++	}
++	pr_info("%sconsole [%s%d]: printing thread started\n",
++		(con->flags & CON_BOOT) ? "boot" : "",
++		con->name, con->index);
+ }
+-static ssize_t info_print_ext_header(char *buf, size_t size,
+-				     struct printk_info *info)
++
++/* protected by console_lock */
++static bool kthreads_started;
++
++/* Must be called within console_lock(). */
++static void console_try_thread(struct console *con)
+ {
+-	return 0;
++	if (kthreads_started) {
++		start_printk_kthread(con);
++		return;
++	}
++
++	/*
++	 * The printing threads have not been started yet. If this console
++	 * can print synchronously, print all unprinted messages.
++	 */
++	if (console_may_sync(con)) {
++		unsigned long flags;
++
++		local_irq_save(flags);
++		print_sync_until(con, prb_next_seq(prb), true);
++		local_irq_restore(flags);
++	}
+ }
+-static ssize_t msg_print_ext_body(char *buf, size_t size,
+-				  char *text, size_t text_len,
+-				  struct dev_printk_info *dev_info) { return 0; }
+-static void console_lock_spinning_enable(void) { }
+-static int console_lock_spinning_disable_and_check(void) { return 0; }
+-static void call_console_drivers(const char *ext_text, size_t ext_len,
+-				 const char *text, size_t len) {}
+-static bool suppress_message_printing(int level) { return false; }
+ 
+ #endif /* CONFIG_PRINTK */
+ 
+@@ -2580,34 +2635,6 @@ int is_console_locked(void)
+ }
+ EXPORT_SYMBOL(is_console_locked);
+ 
+-/*
+- * Check if we have any console that is capable of printing while cpu is
+- * booting or shutting down. Requires console_sem.
+- */
+-static int have_callable_console(void)
+-{
+-	struct console *con;
+-
+-	for_each_console(con)
+-		if ((con->flags & CON_ENABLED) &&
+-				(con->flags & CON_ANYTIME))
+-			return 1;
+-
+-	return 0;
+-}
+-
+-/*
+- * Can we actually use the console at this time on this cpu?
+- *
+- * Console drivers may assume that per-cpu resources have been allocated. So
+- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+- * call them until this CPU is officially up.
+- */
+-static inline int can_use_console(void)
+-{
+-	return cpu_online(raw_smp_processor_id()) || have_callable_console();
+-}
+-
+ /**
+  * console_unlock - unlock the console system
+  *
+@@ -2624,140 +2651,13 @@ static inline int can_use_console(void)
+  */
+ void console_unlock(void)
+ {
+-	static char ext_text[CONSOLE_EXT_LOG_MAX];
+-	static char text[CONSOLE_LOG_MAX];
+-	unsigned long flags;
+-	bool do_cond_resched, retry;
+-	struct printk_info info;
+-	struct printk_record r;
+-	u64 __maybe_unused next_seq;
+-
+ 	if (console_suspended) {
+ 		up_console_sem();
+ 		return;
+ 	}
+ 
+-	prb_rec_init_rd(&r, &info, text, sizeof(text));
+-
+-	/*
+-	 * Console drivers are called with interrupts disabled, so
+-	 * @console_may_schedule should be cleared before; however, we may
+-	 * end up dumping a lot of lines, for example, if called from
+-	 * console registration path, and should invoke cond_resched()
+-	 * between lines if allowable.  Not doing so can cause a very long
+-	 * scheduling stall on a slow console leading to RCU stall and
+-	 * softlockup warnings which exacerbate the issue with more
+-	 * messages practically incapacitating the system.
+-	 *
+-	 * console_trylock() is not able to detect the preemptive
+-	 * context reliably. Therefore the value must be stored before
+-	 * and cleared after the "again" goto label.
+-	 */
+-	do_cond_resched = console_may_schedule;
+-again:
+-	console_may_schedule = 0;
+-
+-	/*
+-	 * We released the console_sem lock, so we need to recheck if
+-	 * cpu is online and (if not) is there at least one CON_ANYTIME
+-	 * console.
+-	 */
+-	if (!can_use_console()) {
+-		console_locked = 0;
+-		up_console_sem();
+-		return;
+-	}
+-
+-	for (;;) {
+-		size_t ext_len = 0;
+-		int handover;
+-		size_t len;
+-
+-skip:
+-		if (!prb_read_valid(prb, console_seq, &r))
+-			break;
+-
+-		if (console_seq != r.info->seq) {
+-			console_dropped += r.info->seq - console_seq;
+-			console_seq = r.info->seq;
+-		}
+-
+-		if (suppress_message_printing(r.info->level)) {
+-			/*
+-			 * Skip record we have buffered and already printed
+-			 * directly to the console when we received it, and
+-			 * record that has level above the console loglevel.
+-			 */
+-			console_seq++;
+-			goto skip;
+-		}
+-
+-		/* Output to all consoles once old messages replayed. */
+-		if (unlikely(exclusive_console &&
+-			     console_seq >= exclusive_console_stop_seq)) {
+-			exclusive_console = NULL;
+-		}
+-
+-		/*
+-		 * Handle extended console text first because later
+-		 * record_print_text() will modify the record buffer in-place.
+-		 */
+-		if (nr_ext_console_drivers) {
+-			ext_len = info_print_ext_header(ext_text,
+-						sizeof(ext_text),
+-						r.info);
+-			ext_len += msg_print_ext_body(ext_text + ext_len,
+-						sizeof(ext_text) - ext_len,
+-						&r.text_buf[0],
+-						r.info->text_len,
+-						&r.info->dev_info);
+-		}
+-		len = record_print_text(&r,
+-				console_msg_format & MSG_FORMAT_SYSLOG,
+-				printk_time);
+-		console_seq++;
+-
+-		/*
+-		 * While actively printing out messages, if another printk()
+-		 * were to occur on another CPU, it may wait for this one to
+-		 * finish. This task can not be preempted if there is a
+-		 * waiter waiting to take over.
+-		 *
+-		 * Interrupts are disabled because the hand over to a waiter
+-		 * must not be interrupted until the hand over is completed
+-		 * (@console_waiter is cleared).
+-		 */
+-		printk_safe_enter_irqsave(flags);
+-		console_lock_spinning_enable();
+-
+-		stop_critical_timings();	/* don't trace print latency */
+-		call_console_drivers(ext_text, ext_len, text, len);
+-		start_critical_timings();
+-
+-		handover = console_lock_spinning_disable_and_check();
+-		printk_safe_exit_irqrestore(flags);
+-		if (handover)
+-			return;
+-
+-		if (do_cond_resched)
+-			cond_resched();
+-	}
+-
+-	/* Get consistent value of the next-to-be-used sequence number. */
+-	next_seq = console_seq;
+-
+ 	console_locked = 0;
+ 	up_console_sem();
+-
+-	/*
+-	 * Someone could have filled up the buffer again, so re-check if there's
+-	 * something to flush. In case we cannot trylock the console_sem again,
+-	 * there's a new owner and the console_unlock() from them will do the
+-	 * flush, no worries.
+-	 */
+-	retry = prb_read_valid(prb, next_seq, NULL);
+-	if (retry && console_trylock())
+-		goto again;
+ }
+ EXPORT_SYMBOL(console_unlock);
+ 
+@@ -2807,18 +2707,20 @@ void console_unblank(void)
+  */
+ void console_flush_on_panic(enum con_flush_mode mode)
+ {
+-	/*
+-	 * If someone else is holding the console lock, trylock will fail
+-	 * and may_schedule may be set.  Ignore and proceed to unlock so
+-	 * that messages are flushed out.  As this can be called from any
+-	 * context and we don't want to get preempted while flushing,
+-	 * ensure may_schedule is cleared.
+-	 */
+-	console_trylock();
+-	console_may_schedule = 0;
++	if (!console_trylock())
++		return;
++
++#ifdef CONFIG_PRINTK
++	if (mode == CONSOLE_REPLAY_ALL) {
++		struct console *c;
++		u64 seq;
++
++		seq = prb_first_valid_seq(prb);
++		for_each_console(c)
++			latched_seq_write(&c->printk_seq, seq);
++	}
++#endif
+ 
+-	if (mode == CONSOLE_REPLAY_ALL)
+-		console_seq = prb_first_valid_seq(prb);
+ 	console_unlock();
+ }
+ 
+@@ -2954,6 +2856,7 @@ static int try_enable_new_console(struct console *newcon, bool user_specified)
+ void register_console(struct console *newcon)
+ {
+ 	struct console *bcon = NULL;
++	u64 __maybe_unused seq = 0;
+ 	int err;
+ 
+ 	for_each_console(bcon) {
+@@ -2976,6 +2879,8 @@ void register_console(struct console *newcon)
+ 		}
+ 	}
+ 
++	newcon->thread = NULL;
++
+ 	if (console_drivers && console_drivers->flags & CON_BOOT)
+ 		bcon = console_drivers;
+ 
+@@ -3017,8 +2922,10 @@ void register_console(struct console *newcon)
+ 	 * the real console are the same physical device, it's annoying to
+ 	 * see the beginning boot messages twice
+ 	 */
+-	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
++	if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
+ 		newcon->flags &= ~CON_PRINTBUFFER;
++		newcon->flags |= CON_HANDOVER;
++	}
+ 
+ 	/*
+ 	 *	Put this console in the list - keep the
+@@ -3040,27 +2947,21 @@ void register_console(struct console *newcon)
+ 	if (newcon->flags & CON_EXTENDED)
+ 		nr_ext_console_drivers++;
+ 
+-	if (newcon->flags & CON_PRINTBUFFER) {
+-		/*
+-		 * console_unlock(); will print out the buffered messages
+-		 * for us.
+-		 *
+-		 * We're about to replay the log buffer.  Only do this to the
+-		 * just-registered console to avoid excessive message spam to
+-		 * the already-registered consoles.
+-		 *
+-		 * Set exclusive_console with disabled interrupts to reduce
+-		 * race window with eventual console_flush_on_panic() that
+-		 * ignores console_lock.
+-		 */
+-		exclusive_console = newcon;
+-		exclusive_console_stop_seq = console_seq;
++#ifdef CONFIG_PRINTK
++	if (!(newcon->flags & CON_PRINTBUFFER))
++		seq = prb_next_seq(prb);
+ 
+-		/* Get a consistent copy of @syslog_seq. */
+-		mutex_lock(&syslog_lock);
+-		console_seq = syslog_seq;
+-		mutex_unlock(&syslog_lock);
+-	}
++	seqcount_latch_init(&newcon->printk_seq.latch);
++	latched_seq_write(&newcon->printk_seq, seq);
++	seqcount_latch_init(&newcon->printk_sync_seq.latch);
++	latched_seq_write(&newcon->printk_sync_seq, seq);
++#ifdef CONFIG_HAVE_NMI
++	seqcount_latch_init(&newcon->printk_sync_nmi_seq.latch);
++	latched_seq_write(&newcon->printk_sync_nmi_seq, seq);
++#endif
++
++	console_try_thread(newcon);
++#endif /* CONFIG_PRINTK */
+ 	console_unlock();
+ 	console_sysfs_notify();
+ 
+@@ -3134,6 +3035,9 @@ int unregister_console(struct console *console)
+ 	console_unlock();
+ 	console_sysfs_notify();
+ 
++	if (console->thread && !IS_ERR(console->thread))
++		kthread_stop(console->thread);
++
+ 	if (console->exit)
+ 		res = console->exit(console);
+ 
+@@ -3216,6 +3120,15 @@ static int __init printk_late_init(void)
+ 			unregister_console(con);
+ 		}
+ 	}
++
++#ifdef CONFIG_PRINTK
++	console_lock();
++	for_each_console(con)
++		start_printk_kthread(con);
++	kthreads_started = true;
++	console_unlock();
++#endif
++
+ 	ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
+ 					console_cpu_notify);
+ 	WARN_ON(ret < 0);
+@@ -3239,14 +3152,8 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
+ {
+ 	int pending = this_cpu_xchg(printk_pending, 0);
+ 
+-	if (pending & PRINTK_PENDING_OUTPUT) {
+-		/* If trylock fails, someone else is doing the printing */
+-		if (console_trylock())
+-			console_unlock();
+-	}
+-
+ 	if (pending & PRINTK_PENDING_WAKEUP)
+-		wake_up_interruptible(&log_wait);
++		wake_up_interruptible_all(&log_wait);
+ }
+ 
+ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
+@@ -3293,7 +3200,7 @@ void defer_console_output(void)
+ 
+ void printk_trigger_flush(void)
+ {
+-	defer_console_output();
++	wake_up_klogd();
+ }
+ 
+ int vprintk_deferred(const char *fmt, va_list args)
+@@ -3444,6 +3351,24 @@ void kmsg_dump(enum kmsg_dump_reason reason)
+ {
+ 	struct kmsg_dumper *dumper;
+ 
++	if (!oops_in_progress) {
++		/*
++		 * If atomic consoles are available, activate kernel sync mode
++		 * to make sure any final messages are visible. The trailing
++		 * printk message is important to flush any pending messages.
++		 */
++		if (have_atomic_console()) {
++			sync_mode = true;
++			pr_info("enabled sync mode\n");
++		}
++
++		/*
++		 * Give the printing threads time to flush, allowing up to
++		 * 1s of no printing forward progress before giving up.
++		 */
++		pr_flush(1000, true);
++	}
++
+ 	rcu_read_lock();
+ 	list_for_each_entry_rcu(dumper, &dump_list, list) {
+ 		enum kmsg_dump_reason max_reason = dumper->max_reason;
+@@ -3626,6 +3551,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
+ #ifdef CONFIG_SMP
+ static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
+ static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
++static unsigned int kgdb_cpu = -1;
+ 
+ /**
+  * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
+@@ -3705,6 +3631,9 @@ EXPORT_SYMBOL(__printk_cpu_trylock);
+  */
+ void __printk_cpu_unlock(void)
+ {
++	bool trigger_kgdb = false;
++	unsigned int cpu;
++
+ 	if (atomic_read(&printk_cpulock_nested)) {
+ 		atomic_dec(&printk_cpulock_nested);
+ 		return;
+@@ -3715,6 +3644,12 @@ void __printk_cpu_unlock(void)
+ 	 * LMM(__printk_cpu_unlock:A)
+ 	 */
+ 
++	cpu = smp_processor_id();
++	if (kgdb_cpu == cpu) {
++		trigger_kgdb = true;
++		kgdb_cpu = -1;
++	}
++
+ 	/*
+ 	 * Guarantee loads and stores from this CPU when it was the
+ 	 * lock owner are visible to the next lock owner. This pairs
+@@ -3735,6 +3670,98 @@ void __printk_cpu_unlock(void)
+ 	 */
+ 	atomic_set_release(&printk_cpulock_owner,
+ 			   -1); /* LMM(__printk_cpu_unlock:B) */
++
++	if (trigger_kgdb) {
++		pr_warn("re-triggering kgdb roundup for CPU#%d\n", cpu);
++		kgdb_roundup_cpu(cpu);
++	}
+ }
+ EXPORT_SYMBOL(__printk_cpu_unlock);
++
++bool kgdb_roundup_delay(unsigned int cpu)
++{
++	if (cpu != atomic_read(&printk_cpulock_owner))
++		return false;
++
++	kgdb_cpu = cpu;
++	return true;
++}
++EXPORT_SYMBOL(kgdb_roundup_delay);
+ #endif /* CONFIG_SMP */
++
++#ifdef CONFIG_PRINTK
++static void pr_msleep(bool may_sleep, int ms)
++{
++	if (may_sleep) {
++		msleep(ms);
++	} else {
++		while (ms--)
++			udelay(1000);
++	}
++}
++
++/**
++ * pr_flush() - Wait for printing threads to catch up.
++ *
++ * @timeout_ms:        The maximum time (in ms) to wait.
++ * @reset_on_progress: Reset the timeout if forward progress is seen.
++ *
++ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
++ * represents infinite waiting.
++ *
++ * If @reset_on_progress is true, the timeout will be reset whenever any
++ * printer has been seen to make some forward progress.
++ *
++ * Context: Any context.
++ * Return: true if all enabled printers are caught up.
++ */
++bool pr_flush(int timeout_ms, bool reset_on_progress)
++{
++	int remaining = timeout_ms;
++	struct console *con;
++	u64 last_diff = 0;
++	bool may_sleep;
++	u64 printk_seq;
++	u64 diff;
++	u64 seq;
++
++	may_sleep = (preemptible() &&
++		     !in_softirq() &&
++		     system_state >= SYSTEM_RUNNING);
++
++	seq = prb_next_seq(prb);
++
++	for (;;) {
++		diff = 0;
++
++		for_each_console(con) {
++			if (!(con->flags & CON_ENABLED))
++				continue;
++			printk_seq = read_console_seq(con);
++			if (printk_seq < seq)
++				diff += seq - printk_seq;
++		}
++
++		if (diff != last_diff && reset_on_progress)
++			remaining = timeout_ms;
++
++		if (diff == 0 || remaining == 0)
++			break;
++
++		if (remaining < 0) {
++			pr_msleep(may_sleep, 100);
++		} else if (remaining < 100) {
++			pr_msleep(may_sleep, remaining);
++			remaining = 0;
++		} else {
++			pr_msleep(may_sleep, 100);
++			remaining -= 100;
++		}
++
++		last_diff = diff;
++	}
++
++	return (diff == 0);
++}
++EXPORT_SYMBOL(pr_flush);
++#endif /* CONFIG_PRINTK */
+diff --git a/kernel/ptrace.c b/kernel/ptrace.c
+index 0cf547531ddf..0df2de214daa 100644
+--- a/kernel/ptrace.c
++++ b/kernel/ptrace.c
+@@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task)
+ 	spin_lock_irq(&task->sighand->siglock);
+ 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+ 	    !__fatal_signal_pending(task)) {
++#ifdef CONFIG_PREEMPT_RT
++		unsigned long flags;
++
++		raw_spin_lock_irqsave(&task->pi_lock, flags);
++		if (READ_ONCE(task->__state) & __TASK_TRACED)
++			WRITE_ONCE(task->__state, __TASK_TRACED);
++		else
++			task->saved_state = __TASK_TRACED;
++		raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++#else
+ 		WRITE_ONCE(task->__state, __TASK_TRACED);
++#endif
+ 		ret = true;
+ 	}
+ 	spin_unlock_irq(&task->sighand->siglock);
+@@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task)
+ 
+ static void ptrace_unfreeze_traced(struct task_struct *task)
+ {
+-	if (READ_ONCE(task->__state) != __TASK_TRACED)
++	unsigned long flags;
++	bool frozen = true;
++
++	if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
++	    READ_ONCE(task->__state) != __TASK_TRACED)
+ 		return;
+ 
+ 	WARN_ON(!task->ptrace || task->parent != current);
+@@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
+ 	 * Recheck state under the lock to close this race.
+ 	 */
+ 	spin_lock_irq(&task->sighand->siglock);
+-	if (READ_ONCE(task->__state) == __TASK_TRACED) {
+-		if (__fatal_signal_pending(task))
+-			wake_up_state(task, __TASK_TRACED);
+-		else
+-			WRITE_ONCE(task->__state, TASK_TRACED);
+-	}
++	raw_spin_lock_irqsave(&task->pi_lock, flags);
++	if (READ_ONCE(task->__state) == __TASK_TRACED)
++		WRITE_ONCE(task->__state, TASK_TRACED);
++
++#ifdef CONFIG_PREEMPT_RT
++	else if (task->saved_state == __TASK_TRACED)
++		task->saved_state = TASK_TRACED;
++#endif
++	else
++		frozen = false;
++	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
++
++	if (frozen && __fatal_signal_pending(task))
++		wake_up_state(task, __TASK_TRACED);
++
+ 	spin_unlock_irq(&task->sighand->siglock);
+ }
+ 
+diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
+index 94b8ee84bc78..509ea934305c 100644
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -1362,7 +1362,7 @@ static void test_rcu_tasks_callback(struct rcu_head *rhp)
+ 	rttd->notrun = true;
+ }
+ 
+-static void rcu_tasks_initiate_self_tests(void)
++void rcu_tasks_initiate_self_tests(void)
+ {
+ 	pr_info("Running RCU-tasks wait API self tests\n");
+ #ifdef CONFIG_TASKS_RCU
+@@ -1399,9 +1399,7 @@ static int rcu_tasks_verify_self_tests(void)
+ 	return ret;
+ }
+ late_initcall(rcu_tasks_verify_self_tests);
+-#else /* #ifdef CONFIG_PROVE_RCU */
+-static void rcu_tasks_initiate_self_tests(void) { }
+-#endif /* #else #ifdef CONFIG_PROVE_RCU */
++#endif /* #ifdef CONFIG_PROVE_RCU */
+ 
+ void __init rcu_init_tasks_generic(void)
+ {
+@@ -1416,9 +1414,6 @@ void __init rcu_init_tasks_generic(void)
+ #ifdef CONFIG_TASKS_TRACE_RCU
+ 	rcu_spawn_tasks_trace_kthread();
+ #endif
+-
+-	// Run the self-tests.
+-	rcu_tasks_initiate_self_tests();
+ }
+ 
+ #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
+diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
+index df016f6d0662..8ea272f7eb18 100644
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -2280,13 +2280,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
+ {
+ 	unsigned long flags;
+ 	unsigned long mask;
+-	bool needwake = false;
+-	const bool offloaded = rcu_rdp_is_offloaded(rdp);
++	bool offloaded, needwake = false;
+ 	struct rcu_node *rnp;
+ 
+ 	WARN_ON_ONCE(rdp->cpu != smp_processor_id());
+ 	rnp = rdp->mynode;
+ 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
++	offloaded = rcu_rdp_is_offloaded(rdp);
+ 	if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
+ 	    rdp->gpwrap) {
+ 
+@@ -2448,7 +2448,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
+ 	int div;
+ 	bool __maybe_unused empty;
+ 	unsigned long flags;
+-	const bool offloaded = rcu_rdp_is_offloaded(rdp);
++	bool offloaded;
+ 	struct rcu_head *rhp;
+ 	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
+ 	long bl, count = 0;
+@@ -2474,6 +2474,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
+ 	rcu_nocb_lock(rdp);
+ 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
+ 	pending = rcu_segcblist_n_cbs(&rdp->cblist);
++	offloaded = rcu_rdp_is_offloaded(rdp);
+ 	div = READ_ONCE(rcu_divisor);
+ 	div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div;
+ 	bl = max(rdp->blimit, pending >> div);
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index d34a56f16d13..cd0983900823 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -75,7 +75,11 @@ __read_mostly int sysctl_resched_latency_warn_once = 1;
+  * Number of tasks to iterate in a single balance run.
+  * Limited because this is done with IRQs disabled.
+  */
++#ifdef CONFIG_PREEMPT_RT
++const_debug unsigned int sysctl_sched_nr_migrate = 8;
++#else
+ const_debug unsigned int sysctl_sched_nr_migrate = 32;
++#endif
+ 
+ /*
+  * period over which we measure -rt task CPU usage in us.
+@@ -983,6 +987,46 @@ void resched_curr(struct rq *rq)
+ 		trace_sched_wake_idle_without_ipi(cpu);
+ }
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++
++static int tsk_is_polling(struct task_struct *p)
++{
++#ifdef TIF_POLLING_NRFLAG
++	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
++#else
++	return 0;
++#endif
++}
++
++void resched_curr_lazy(struct rq *rq)
++{
++	struct task_struct *curr = rq->curr;
++	int cpu;
++
++	if (!sched_feat(PREEMPT_LAZY)) {
++		resched_curr(rq);
++		return;
++	}
++
++	if (test_tsk_need_resched(curr))
++		return;
++
++	if (test_tsk_need_resched_lazy(curr))
++		return;
++
++	set_tsk_need_resched_lazy(curr);
++
++	cpu = cpu_of(rq);
++	if (cpu == smp_processor_id())
++		return;
++
++	/* NEED_RESCHED_LAZY must be visible before we test polling */
++	smp_mb();
++	if (!tsk_is_polling(curr))
++		smp_send_reschedule(cpu);
++}
++#endif
++
+ void resched_cpu(int cpu)
+ {
+ 	struct rq *rq = cpu_rq(cpu);
+@@ -2141,6 +2185,7 @@ void migrate_disable(void)
+ 	preempt_disable();
+ 	this_rq()->nr_pinned++;
+ 	p->migration_disabled = 1;
++	preempt_lazy_disable();
+ 	preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_disable);
+@@ -2152,6 +2197,8 @@ void migrate_enable(void)
+ 	if (p->migration_disabled > 1) {
+ 		p->migration_disabled--;
+ 		return;
++	} else if (WARN_ON_ONCE(p->migration_disabled == 0)) {
++		return;
+ 	}
+ 
+ 	/*
+@@ -2169,6 +2216,7 @@ void migrate_enable(void)
+ 	barrier();
+ 	p->migration_disabled = 0;
+ 	this_rq()->nr_pinned--;
++	preempt_lazy_enable();
+ 	preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_enable);
+@@ -3235,7 +3283,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+ 		 * is actually now running somewhere else!
+ 		 */
+ 		while (task_running(rq, p)) {
+-			if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
++			if (match_state && !task_match_state_lock(p, match_state))
+ 				return 0;
+ 			cpu_relax();
+ 		}
+@@ -3250,7 +3298,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+ 		running = task_running(rq, p);
+ 		queued = task_on_rq_queued(p);
+ 		ncsw = 0;
+-		if (!match_state || READ_ONCE(p->__state) == match_state)
++		if (!match_state || task_match_state_or_saved(p, match_state))
+ 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+ 		task_rq_unlock(rq, p, &rf);
+ 
+@@ -3284,7 +3332,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
+ 			ktime_t to = NSEC_PER_SEC / HZ;
+ 
+ 			set_current_state(TASK_UNINTERRUPTIBLE);
+-			schedule_hrtimeout(&to, HRTIMER_MODE_REL);
++			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
+ 			continue;
+ 		}
+ 
+@@ -4427,6 +4475,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ 	p->on_cpu = 0;
+ #endif
+ 	init_task_preempt_count(p);
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++	task_thread_info(p)->preempt_lazy_count = 0;
++#endif
+ #ifdef CONFIG_SMP
+ 	plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ 	RB_CLEAR_NODE(&p->pushable_dl_tasks);
+@@ -4922,20 +4973,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
+ 	 */
+ 	if (mm) {
+ 		membarrier_mm_sync_core_before_usermode(mm);
+-		mmdrop(mm);
++		mmdrop_sched(mm);
+ 	}
+ 	if (unlikely(prev_state == TASK_DEAD)) {
+ 		if (prev->sched_class->task_dead)
+ 			prev->sched_class->task_dead(prev);
+ 
+ 		/*
+-		 * Remove function-return probe instances associated with this
+-		 * task and put them back on the free list.
++		 * Release VMAP'ed task stack immediate for reuse. On RT
++		 * enabled kernels this is delayed for latency reasons.
+ 		 */
+-		kprobe_flush_task(prev);
+-
+-		/* Task is done with its stack. */
+-		put_task_stack(prev);
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++			put_task_stack(prev);
+ 
+ 		put_task_struct_rcu_user(prev);
+ 	}
+@@ -6335,6 +6384,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
+ 
+ 	next = pick_next_task(rq, prev, &rf);
+ 	clear_tsk_need_resched(prev);
++	clear_tsk_need_resched_lazy(prev);
+ 	clear_preempt_need_resched();
+ #ifdef CONFIG_SCHED_DEBUG
+ 	rq->last_seen_need_resched_ns = 0;
+@@ -6556,6 +6606,30 @@ static void __sched notrace preempt_schedule_common(void)
+ 	} while (need_resched());
+ }
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++/*
++ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
++ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
++ * preempt_lazy_count counter >0.
++ */
++static __always_inline int preemptible_lazy(void)
++{
++	if (test_thread_flag(TIF_NEED_RESCHED))
++		return 1;
++	if (current_thread_info()->preempt_lazy_count)
++		return 0;
++	return 1;
++}
++
++#else
++
++static inline int preemptible_lazy(void)
++{
++	return 1;
++}
++
++#endif
++
+ #ifdef CONFIG_PREEMPTION
+ /*
+  * This is the entry point to schedule() from in-kernel preemption
+@@ -6569,7 +6643,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
+ 	 */
+ 	if (likely(!preemptible()))
+ 		return;
+-
++	if (!preemptible_lazy())
++		return;
+ 	preempt_schedule_common();
+ }
+ NOKPROBE_SYMBOL(preempt_schedule);
+@@ -6602,6 +6677,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+ 	if (likely(!preemptible()))
+ 		return;
+ 
++	if (!preemptible_lazy())
++		return;
++
+ 	do {
+ 		/*
+ 		 * Because the function tracer can trace preempt_count_sub()
+@@ -8754,7 +8832,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
+ 
+ 	/* Set the preempt count _outside_ the spinlocks! */
+ 	init_idle_preempt_count(idle, cpu);
+-
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++	task_thread_info(idle)->preempt_lazy_count = 0;
++#endif
+ 	/*
+ 	 * The idle tasks have their own, simple scheduling class:
+ 	 */
+@@ -9555,14 +9635,8 @@ void __init sched_init(void)
+ }
+ 
+ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+-static inline int preempt_count_equals(int preempt_offset)
+-{
+-	int nested = preempt_count() + rcu_preempt_depth();
+-
+-	return (nested == preempt_offset);
+-}
+ 
+-void __might_sleep(const char *file, int line, int preempt_offset)
++void __might_sleep(const char *file, int line)
+ {
+ 	unsigned int state = get_current_state();
+ 	/*
+@@ -9576,11 +9650,32 @@ void __might_sleep(const char *file, int line, int preempt_offset)
+ 			(void *)current->task_state_change,
+ 			(void *)current->task_state_change);
+ 
+-	___might_sleep(file, line, preempt_offset);
++	__might_resched(file, line, 0);
+ }
+ EXPORT_SYMBOL(__might_sleep);
+ 
+-void ___might_sleep(const char *file, int line, int preempt_offset)
++static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
++{
++	if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT))
++		return;
++
++	if (preempt_count() == preempt_offset)
++		return;
++
++	pr_err("Preemption disabled at:");
++	print_ip_sym(KERN_ERR, ip);
++}
++
++static inline bool resched_offsets_ok(unsigned int offsets)
++{
++	unsigned int nested = preempt_count();
++
++	nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT;
++
++	return nested == offsets;
++}
++
++void __might_resched(const char *file, int line, unsigned int offsets)
+ {
+ 	/* Ratelimiting timestamp: */
+ 	static unsigned long prev_jiffy;
+@@ -9590,7 +9685,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
+ 	/* WARN_ON_ONCE() by default, no rate limit required: */
+ 	rcu_sleep_check();
+ 
+-	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
++	if ((resched_offsets_ok(offsets) && !irqs_disabled() &&
+ 	     !is_idle_task(current) && !current->non_block_count) ||
+ 	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
+ 	    oops_in_progress)
+@@ -9603,29 +9698,33 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
+ 	/* Save this before calling printk(), since that will clobber it: */
+ 	preempt_disable_ip = get_preempt_disable_ip(current);
+ 
+-	printk(KERN_ERR
+-		"BUG: sleeping function called from invalid context at %s:%d\n",
+-			file, line);
+-	printk(KERN_ERR
+-		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
+-			in_atomic(), irqs_disabled(), current->non_block_count,
+-			current->pid, current->comm);
++	pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
++	       file, line);
++	pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
++	       in_atomic(), irqs_disabled(), current->non_block_count,
++	       current->pid, current->comm);
++	pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
++	       offsets & MIGHT_RESCHED_PREEMPT_MASK);
++
++	if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
++		pr_err("RCU nest depth: %d, expected: %u\n",
++		       rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT);
++	}
+ 
+ 	if (task_stack_end_corrupted(current))
+-		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
++		pr_emerg("Thread overran stack, or stack corrupted\n");
+ 
+ 	debug_show_held_locks(current);
+ 	if (irqs_disabled())
+ 		print_irqtrace_events(current);
+-	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+-	    && !preempt_count_equals(preempt_offset)) {
+-		pr_err("Preemption disabled at:");
+-		print_ip_sym(KERN_ERR, preempt_disable_ip);
+-	}
++
++	print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK,
++				 preempt_disable_ip);
++
+ 	dump_stack();
+ 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+ }
+-EXPORT_SYMBOL(___might_sleep);
++EXPORT_SYMBOL(__might_resched);
+ 
+ void __cant_sleep(const char *file, int line, int preempt_offset)
+ {
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 646a6ae4b250..c02ecc105f0c 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -4651,7 +4651,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ 	ideal_runtime = sched_slice(cfs_rq, curr);
+ 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+ 	if (delta_exec > ideal_runtime) {
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ 		/*
+ 		 * The current task ran long enough, ensure it doesn't get
+ 		 * re-elected due to buddy favours.
+@@ -4675,7 +4675,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ 		return;
+ 
+ 	if (delta > ideal_runtime)
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ }
+ 
+ static void
+@@ -4821,7 +4821,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+ 	 * validating it and just reschedule.
+ 	 */
+ 	if (queued) {
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ 		return;
+ 	}
+ 	/*
+@@ -4961,7 +4961,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
+ 	 * hierarchy can be throttled
+ 	 */
+ 	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
+-		resched_curr(rq_of(cfs_rq));
++		resched_curr_lazy(rq_of(cfs_rq));
+ }
+ 
+ static __always_inline
+@@ -5724,7 +5724,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
+ 
+ 		if (delta < 0) {
+ 			if (task_current(rq, p))
+-				resched_curr(rq);
++				resched_curr_lazy(rq);
+ 			return;
+ 		}
+ 		hrtick_start(rq, delta);
+@@ -7449,7 +7449,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ 	return;
+ 
+ preempt:
+-	resched_curr(rq);
++	resched_curr_lazy(rq);
+ 	/*
+ 	 * Only set the backward buddy when the current task is still
+ 	 * on the rq. This can happen when a wakeup gets interleaved
+@@ -11508,7 +11508,7 @@ static void task_fork_fair(struct task_struct *p)
+ 		 * 'current' within the tree based on its new key value.
+ 		 */
+ 		swap(curr->vruntime, se->vruntime);
+-		resched_curr(rq);
++		resched_curr_lazy(rq);
+ 	}
+ 
+ 	se->vruntime -= cfs_rq->min_vruntime;
+@@ -11535,7 +11535,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+ 	 */
+ 	if (task_current(rq, p)) {
+ 		if (p->prio > oldprio)
+-			resched_curr(rq);
++			resched_curr_lazy(rq);
+ 	} else
+ 		check_preempt_curr(rq, p, 0);
+ }
+diff --git a/kernel/sched/features.h b/kernel/sched/features.h
+index c4947c1b5edb..e13090e33f3c 100644
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -46,11 +46,19 @@ SCHED_FEAT(DOUBLE_TICK, false)
+  */
+ SCHED_FEAT(NONTASK_CAPACITY, true)
+ 
++#ifdef CONFIG_PREEMPT_RT
++SCHED_FEAT(TTWU_QUEUE, false)
++# ifdef CONFIG_PREEMPT_LAZY
++SCHED_FEAT(PREEMPT_LAZY, true)
++# endif
++#else
++
+ /*
+  * Queue remote wakeups on the target CPU and process them
+  * using the scheduler IPI. Reduces rq->lock contention/bounces.
+  */
+ SCHED_FEAT(TTWU_QUEUE, true)
++#endif
+ 
+ /*
+  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 6312f1904825..36483b794a00 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -2327,6 +2327,15 @@ extern void reweight_task(struct task_struct *p, int prio);
+ extern void resched_curr(struct rq *rq);
+ extern void resched_cpu(int cpu);
+ 
++#ifdef CONFIG_PREEMPT_LAZY
++extern void resched_curr_lazy(struct rq *rq);
++#else
++static inline void resched_curr_lazy(struct rq *rq)
++{
++	resched_curr(rq);
++}
++#endif
++
+ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+ 
+diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
+index e1c655f928c7..f230b1ac7f91 100644
+--- a/kernel/sched/swait.c
++++ b/kernel/sched/swait.c
+@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_head *q)
+ 	struct swait_queue *curr;
+ 	LIST_HEAD(tmp);
+ 
++	WARN_ON(irqs_disabled());
+ 	raw_spin_lock_irq(&q->lock);
+ 	list_splice_init(&q->task_list, &tmp);
+ 	while (!list_empty(&tmp)) {
+diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
+index 4e8698e62f07..3d0157bd4e14 100644
+--- a/kernel/sched/topology.c
++++ b/kernel/sched/topology.c
+@@ -526,7 +526,7 @@ static int init_rootdomain(struct root_domain *rd)
+ #ifdef HAVE_RT_PUSH_IPI
+ 	rd->rto_cpu = -1;
+ 	raw_spin_lock_init(&rd->rto_lock);
+-	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
++	rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);
+ #endif
+ 
+ 	rd->visit_gen = 0;
+diff --git a/kernel/signal.c b/kernel/signal.c
+index c7dbb19219b9..0bbd89fbf240 100644
+--- a/kernel/signal.c
++++ b/kernel/signal.c
+@@ -1324,6 +1324,34 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t,
+ 	struct k_sigaction *action;
+ 	int sig = info->si_signo;
+ 
++	/*
++	 * On some archs, PREEMPT_RT has to delay sending a signal from a trap
++	 * since it can not enable preemption, and the signal code's spin_locks
++	 * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
++	 * send the signal on exit of the trap.
++	 */
++#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
++	if (in_atomic()) {
++		struct task_struct *t = current;
++
++		if (WARN_ON_ONCE(t->forced_info.si_signo))
++			return 0;
++
++		if (is_si_special(info)) {
++			WARN_ON_ONCE(info != SEND_SIG_PRIV);
++			t->forced_info.si_signo = info->si_signo;
++			t->forced_info.si_errno = 0;
++			t->forced_info.si_code = SI_KERNEL;
++			t->forced_info.si_pid = 0;
++			t->forced_info.si_uid = 0;
++		} else {
++			t->forced_info = *info;
++		}
++
++		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
++		return 0;
++	}
++#endif
+ 	spin_lock_irqsave(&t->sighand->siglock, flags);
+ 	action = &t->sighand->action[sig-1];
+ 	ignored = action->sa.sa_handler == SIG_IGN;
+@@ -2308,16 +2336,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
+ 		if (gstop_done && ptrace_reparented(current))
+ 			do_notify_parent_cldstop(current, false, why);
+ 
+-		/*
+-		 * Don't want to allow preemption here, because
+-		 * sys_ptrace() needs this task to be inactive.
+-		 *
+-		 * XXX: implement read_unlock_no_resched().
+-		 */
+-		preempt_disable();
+ 		read_unlock(&tasklist_lock);
+ 		cgroup_enter_frozen();
+-		preempt_enable_no_resched();
+ 		freezable_schedule();
+ 		cgroup_leave_frozen(true);
+ 	} else {
+diff --git a/kernel/smp.c b/kernel/smp.c
+index 82825345432c..9d3c8c56d904 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -690,10 +690,20 @@ void flush_smp_call_function_from_idle(void)
+ 
+ 	cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU,
+ 		      smp_processor_id(), CFD_SEQ_IDLE);
++
+ 	local_irq_save(flags);
+ 	flush_smp_call_function_queue(true);
+-	if (local_softirq_pending())
+-		do_softirq();
++
++	if (local_softirq_pending()) {
++		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
++			do_softirq();
++		} else {
++			struct task_struct *ksoftirqd = this_cpu_ksoftirqd();
++
++			if (ksoftirqd && !task_is_running(ksoftirqd))
++				wake_up_process(ksoftirqd);
++		}
++	}
+ 
+ 	local_irq_restore(flags);
+ }
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 0202f23ae960..7fc118c87b9d 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2646,7 +2646,13 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
+ 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
+ 	if (test_preempt_need_resched())
+ 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
+-	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
++#ifdef CONFIG_PREEMPT_LAZY
++	if (need_resched_lazy())
++		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
++#endif
++
++	return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) |
++		(preempt_lazy_count() & 0xff) << 16 |
+ 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
+ }
+ 
+@@ -4227,15 +4233,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
+ 
+ static void print_lat_help_header(struct seq_file *m)
+ {
+-	seq_puts(m, "#                    _------=> CPU#            \n"
+-		    "#                   / _-----=> irqs-off        \n"
+-		    "#                  | / _----=> need-resched    \n"
+-		    "#                  || / _---=> hardirq/softirq \n"
+-		    "#                  ||| / _--=> preempt-depth   \n"
+-		    "#                  |||| / _-=> migrate-disable \n"
+-		    "#                  ||||| /     delay           \n"
+-		    "#  cmd     pid     |||||| time  |   caller     \n"
+-		    "#     \\   /        ||||||  \\    |    /       \n");
++	seq_puts(m, "#                    _--------=> CPU#            \n"
++		    "#                   / _-------=> irqs-off        \n"
++		    "#                  | / _------=> need-resched    \n"
++		    "#                  || / _-----=> need-resched-lazy\n"
++		    "#                  ||| / _----=> hardirq/softirq \n"
++		    "#                  |||| / _---=> preempt-depth   \n"
++		    "#                  ||||| / _--=> preempt-lazy-depth\n"
++		    "#                  |||||| / _-=> migrate-disable \n"
++		    "#                  ||||||| /     delay           \n"
++		    "#  cmd     pid     |||||||| time  |   caller     \n"
++		    "#     \\   /        ||||||||  \\    |    /       \n");
+ }
+ 
+ static void print_event_info(struct array_buffer *buf, struct seq_file *m)
+@@ -4269,14 +4277,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
+ 
+ 	print_event_info(buf, m);
+ 
+-	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
+-	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
+-	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
+-	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
+-	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
+-	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
+-	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
+-	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
++	seq_printf(m, "#                            %.*s  _-------=> irqs-off\n", prec, space);
++	seq_printf(m, "#                            %.*s / _------=> need-resched\n", prec, space);
++	seq_printf(m, "#                            %.*s| / _-----=> need-resched-lazy\n", prec, space);
++	seq_printf(m, "#                            %.*s|| / _----=> hardirq/softirq\n", prec, space);
++	seq_printf(m, "#                            %.*s||| / _---=> preempt-depth\n", prec, space);
++	seq_printf(m, "#                            %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
++	seq_printf(m, "#                            %.*s||||| / _-=> migrate-disable\n", prec, space);
++	seq_printf(m, "#                            %.*s|||||| /     delay\n", prec, space);
++	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
++	seq_printf(m, "#              | |    %.*s   |   |||||||      |         |\n", prec, "       |    ");
+ }
+ 
+ void
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 160298d285c0..9ec3c6c38cc3 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -193,6 +193,7 @@ static int trace_define_common_fields(void)
+ 	/* Holds both preempt_count and migrate_disable */
+ 	__common_field(unsigned char, preempt_count);
+ 	__common_field(int, pid);
++	__common_field(unsigned char, preempt_lazy_count);
+ 
+ 	return ret;
+ }
+diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
+index 6b4d3f3abdae..460bc8245e4a 100644
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ {
+ 	char hardsoft_irq;
+ 	char need_resched;
++	char need_resched_lazy;
+ 	char irqs_off;
+ 	int hardirq;
+ 	int softirq;
+@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ 		break;
+ 	}
+ 
++	need_resched_lazy =
++		(entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
++
+ 	hardsoft_irq =
+ 		(nmi && hardirq)     ? 'Z' :
+ 		nmi                  ? 'z' :
+@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ 		softirq              ? 's' :
+ 		                       '.' ;
+ 
+-	trace_seq_printf(s, "%c%c%c",
+-			 irqs_off, need_resched, hardsoft_irq);
++	trace_seq_printf(s, "%c%c%c%c",
++			 irqs_off, need_resched, need_resched_lazy,
++			 hardsoft_irq);
+ 
+ 	if (entry->preempt_count & 0xf)
+ 		trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
+ 	else
+ 		trace_seq_putc(s, '.');
+ 
++	if (entry->preempt_lazy_count)
++		trace_seq_printf(s, "%x", entry->preempt_lazy_count);
++	else
++		trace_seq_putc(s, '.');
++
+ 	if (entry->preempt_count & 0xf0)
+ 		trace_seq_printf(s, "%x", entry->preempt_count >> 4);
+ 	else
+diff --git a/lib/bug.c b/lib/bug.c
+index 45a0584f6541..03a87df69ed2 100644
+--- a/lib/bug.c
++++ b/lib/bug.c
+@@ -206,6 +206,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+ 	else
+ 		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
+ 			(void *)bugaddr);
++	pr_flush(1000, true);
+ 
+ 	return BUG_TRAP_TYPE_BUG;
+ }
+diff --git a/lib/dump_stack.c b/lib/dump_stack.c
+index 6b7f1bf6715d..6e8ae42c7e27 100644
+--- a/lib/dump_stack.c
++++ b/lib/dump_stack.c
+@@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
+ 	 * Permit this cpu to perform nested stack dumps while serialising
+ 	 * against other CPUs
+ 	 */
+-	printk_cpu_lock_irqsave(flags);
++	raw_printk_cpu_lock_irqsave(flags);
+ 	__dump_stack(log_lvl);
+-	printk_cpu_unlock_irqrestore(flags);
++	raw_printk_cpu_unlock_irqrestore(flags);
+ }
+ EXPORT_SYMBOL(dump_stack_lvl);
+ 
+diff --git a/lib/irq_poll.c b/lib/irq_poll.c
+index 2f17b488d58e..2b9f797642f6 100644
+--- a/lib/irq_poll.c
++++ b/lib/irq_poll.c
+@@ -191,11 +191,13 @@ static int irq_poll_cpu_dead(unsigned int cpu)
+ 	 * If a CPU goes away, splice its entries to the current CPU
+ 	 * and trigger a run of the softirq
+ 	 */
++	local_bh_disable();
+ 	local_irq_disable();
+ 	list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ 			 this_cpu_ptr(&blk_cpu_iopoll));
+ 	__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+ 	local_irq_enable();
++	local_bh_enable();
+ 
+ 	return 0;
+ }
+diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
+index 161108e5d2fe..1266ea3726d7 100644
+--- a/lib/locking-selftest.c
++++ b/lib/locking-selftest.c
+@@ -26,6 +26,12 @@
+ #include <linux/rtmutex.h>
+ #include <linux/local_lock.h>
+ 
++#ifdef CONFIG_PREEMPT_RT
++# define NON_RT(...)
++#else
++# define NON_RT(...)	__VA_ARGS__
++#endif
++
+ /*
+  * Change this to 1 if you want to see the failure printouts:
+  */
+@@ -139,7 +145,7 @@ static DEFINE_RT_MUTEX(rtmutex_Z2);
+ 
+ #endif
+ 
+-static local_lock_t local_A = INIT_LOCAL_LOCK(local_A);
++static DEFINE_PER_CPU(local_lock_t, local_A);
+ 
+ /*
+  * non-inlined runtime initializers, to let separate locks share
+@@ -712,12 +718,18 @@ GENERATE_TESTCASE(ABCDBCDA_rtmutex);
+ 
+ #undef E
+ 
++#ifdef CONFIG_PREEMPT_RT
++# define RT_PREPARE_DBL_UNLOCK()	{ migrate_disable(); rcu_read_lock(); }
++#else
++# define RT_PREPARE_DBL_UNLOCK()
++#endif
+ /*
+  * Double unlock:
+  */
+ #define E()					\
+ 						\
+ 	LOCK(A);				\
++	RT_PREPARE_DBL_UNLOCK();		\
+ 	UNLOCK(A);				\
+ 	UNLOCK(A); /* fail */
+ 
+@@ -802,6 +814,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
+ #include "locking-selftest-wlock-hardirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-spin-softirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
+ 
+@@ -810,10 +823,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
+ 
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+ 
++#ifndef CONFIG_PREEMPT_RT
+ /*
+  * Enabling hardirqs with a softirq-safe lock held:
+  */
+@@ -846,6 +861,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+ #undef E1
+ #undef E2
+ 
++#endif
++
+ /*
+  * Enabling irqs with an irq-safe lock held:
+  */
+@@ -875,6 +892,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
+ #include "locking-selftest-wlock-hardirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-spin-softirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
+ 
+@@ -883,6 +901,7 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
+ 
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+@@ -921,6 +940,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
+ #include "locking-selftest-wlock-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-spin-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
+ 
+@@ -929,6 +949,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
+ 
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+@@ -969,6 +990,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
+ #include "locking-selftest-wlock-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-spin-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
+ 
+@@ -977,6 +999,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
+ 
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+@@ -1031,6 +1054,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
+ #include "locking-selftest-wlock-hardirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-spin-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
+ 
+@@ -1039,6 +1063,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
+ 
+ #include "locking-selftest-wlock-softirq.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+@@ -1206,12 +1231,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock)
+ #include "locking-selftest-wlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-softirq.h"
+ #include "locking-selftest-rlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock)
+ 
+ #include "locking-selftest-wlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+@@ -1252,12 +1279,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock)
+ #include "locking-selftest-wlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-softirq.h"
+ #include "locking-selftest-rlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
+ 
+ #include "locking-selftest-wlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
++#endif
+ 
+ #undef E1
+ #undef E2
+@@ -1306,12 +1335,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock)
+ #include "locking-selftest-wlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock)
+ 
++#ifndef CONFIG_PREEMPT_RT
+ #include "locking-selftest-softirq.h"
+ #include "locking-selftest-rlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock)
+ 
+ #include "locking-selftest-wlock.h"
+ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
++#endif
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
+@@ -1320,7 +1351,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
+ # define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
+ # define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
+ # define I_WW(x)	lockdep_reset_lock(&x.dep_map)
+-# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map)
++# define I_LOCAL_LOCK(x) lockdep_reset_lock(this_cpu_ptr(&local_##x.dep_map))
+ #ifdef CONFIG_RT_MUTEXES
+ # define I_RTMUTEX(x)	lockdep_reset_lock(&rtmutex_##x.dep_map)
+ #endif
+@@ -1380,7 +1411,7 @@ static void reset_locks(void)
+ 	init_shared_classes();
+ 	raw_spin_lock_init(&raw_lock_A);
+ 	raw_spin_lock_init(&raw_lock_B);
+-	local_lock_init(&local_A);
++	local_lock_init(this_cpu_ptr(&local_A));
+ 
+ 	ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep);
+ 	memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2));
+@@ -1398,7 +1429,13 @@ static int unexpected_testcase_failures;
+ 
+ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
+ {
+-	unsigned long saved_preempt_count = preempt_count();
++	int saved_preempt_count = preempt_count();
++#ifdef CONFIG_PREEMPT_RT
++#ifdef CONFIG_SMP
++	int saved_mgd_count = current->migration_disabled;
++#endif
++	int saved_rcu_count = current->rcu_read_lock_nesting;
++#endif
+ 
+ 	WARN_ON(irqs_disabled());
+ 
+@@ -1432,6 +1469,18 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
+ 	 * count, so restore it:
+ 	 */
+ 	preempt_count_set(saved_preempt_count);
++
++#ifdef CONFIG_PREEMPT_RT
++#ifdef CONFIG_SMP
++	while (current->migration_disabled > saved_mgd_count)
++		migrate_enable();
++#endif
++
++	while (current->rcu_read_lock_nesting > saved_rcu_count)
++		rcu_read_unlock();
++	WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count);
++#endif
++
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ 	if (softirq_count())
+ 		current->softirqs_enabled = 0;
+@@ -1499,7 +1548,7 @@ static inline void print_testname(const char *testname)
+ 
+ #define DO_TESTCASE_2x2RW(desc, name, nr)			\
+ 	DO_TESTCASE_2RW("hard-"desc, name##_hard, nr)		\
+-	DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)		\
++	NON_RT(DO_TESTCASE_2RW("soft-"desc, name##_soft, nr))	\
+ 
+ #define DO_TESTCASE_6x2x2RW(desc, name)				\
+ 	DO_TESTCASE_2x2RW(desc, name, 123);			\
+@@ -1547,19 +1596,19 @@ static inline void print_testname(const char *testname)
+ 
+ #define DO_TESTCASE_2I(desc, name, nr)				\
+ 	DO_TESTCASE_1("hard-"desc, name##_hard, nr);		\
+-	DO_TESTCASE_1("soft-"desc, name##_soft, nr);
++	NON_RT(DO_TESTCASE_1("soft-"desc, name##_soft, nr));
+ 
+ #define DO_TESTCASE_2IB(desc, name, nr)				\
+ 	DO_TESTCASE_1B("hard-"desc, name##_hard, nr);		\
+-	DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
++	NON_RT(DO_TESTCASE_1B("soft-"desc, name##_soft, nr));
+ 
+ #define DO_TESTCASE_6I(desc, name, nr)				\
+ 	DO_TESTCASE_3("hard-"desc, name##_hard, nr);		\
+-	DO_TESTCASE_3("soft-"desc, name##_soft, nr);
++	NON_RT(DO_TESTCASE_3("soft-"desc, name##_soft, nr));
+ 
+ #define DO_TESTCASE_6IRW(desc, name, nr)			\
+ 	DO_TESTCASE_3RW("hard-"desc, name##_hard, nr);		\
+-	DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
++	NON_RT(DO_TESTCASE_3RW("soft-"desc, name##_soft, nr));
+ 
+ #define DO_TESTCASE_2x3(desc, name)				\
+ 	DO_TESTCASE_3(desc, name, 12);				\
+@@ -1651,6 +1700,20 @@ static void ww_test_fail_acquire(void)
+ #endif
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++#define ww_mutex_base_lock(b)			rt_mutex_lock(b)
++#define ww_mutex_base_lock_nest_lock(b, b2)	rt_mutex_lock_nest_lock(b, b2)
++#define ww_mutex_base_lock_interruptible(b)	rt_mutex_lock_interruptible(b)
++#define ww_mutex_base_lock_killable(b)		rt_mutex_lock_killable(b)
++#define ww_mutex_base_unlock(b)			rt_mutex_unlock(b)
++#else
++#define ww_mutex_base_lock(b)			mutex_lock(b)
++#define ww_mutex_base_lock_nest_lock(b, b2)	mutex_lock_nest_lock(b, b2)
++#define ww_mutex_base_lock_interruptible(b)	mutex_lock_interruptible(b)
++#define ww_mutex_base_lock_killable(b)		mutex_lock_killable(b)
++#define ww_mutex_base_unlock(b)			mutex_unlock(b)
++#endif
++
+ static void ww_test_normal(void)
+ {
+ 	int ret;
+@@ -1665,50 +1728,50 @@ static void ww_test_normal(void)
+ 
+ 	/* mutex_lock (and indirectly, mutex_lock_nested) */
+ 	o.ctx = (void *)~0UL;
+-	mutex_lock(&o.base);
+-	mutex_unlock(&o.base);
++	ww_mutex_base_lock(&o.base);
++	ww_mutex_base_unlock(&o.base);
+ 	WARN_ON(o.ctx != (void *)~0UL);
+ 
+ 	/* mutex_lock_interruptible (and *_nested) */
+ 	o.ctx = (void *)~0UL;
+-	ret = mutex_lock_interruptible(&o.base);
++	ret = ww_mutex_base_lock_interruptible(&o.base);
+ 	if (!ret)
+-		mutex_unlock(&o.base);
++		ww_mutex_base_unlock(&o.base);
+ 	else
+ 		WARN_ON(1);
+ 	WARN_ON(o.ctx != (void *)~0UL);
+ 
+ 	/* mutex_lock_killable (and *_nested) */
+ 	o.ctx = (void *)~0UL;
+-	ret = mutex_lock_killable(&o.base);
++	ret = ww_mutex_base_lock_killable(&o.base);
+ 	if (!ret)
+-		mutex_unlock(&o.base);
++		ww_mutex_base_unlock(&o.base);
+ 	else
+ 		WARN_ON(1);
+ 	WARN_ON(o.ctx != (void *)~0UL);
+ 
+ 	/* trylock, succeeding */
+ 	o.ctx = (void *)~0UL;
+-	ret = mutex_trylock(&o.base);
++	ret = ww_mutex_base_trylock(&o.base);
+ 	WARN_ON(!ret);
+ 	if (ret)
+-		mutex_unlock(&o.base);
++		ww_mutex_base_unlock(&o.base);
+ 	else
+ 		WARN_ON(1);
+ 	WARN_ON(o.ctx != (void *)~0UL);
+ 
+ 	/* trylock, failing */
+ 	o.ctx = (void *)~0UL;
+-	mutex_lock(&o.base);
+-	ret = mutex_trylock(&o.base);
++	ww_mutex_base_lock(&o.base);
++	ret = ww_mutex_base_trylock(&o.base);
+ 	WARN_ON(ret);
+-	mutex_unlock(&o.base);
++	ww_mutex_base_unlock(&o.base);
+ 	WARN_ON(o.ctx != (void *)~0UL);
+ 
+ 	/* nest_lock */
+ 	o.ctx = (void *)~0UL;
+-	mutex_lock_nest_lock(&o.base, &t);
+-	mutex_unlock(&o.base);
++	ww_mutex_base_lock_nest_lock(&o.base, &t);
++	ww_mutex_base_unlock(&o.base);
+ 	WARN_ON(o.ctx != (void *)~0UL);
+ }
+ 
+@@ -1721,7 +1784,7 @@ static void ww_test_two_contexts(void)
+ static void ww_test_diff_class(void)
+ {
+ 	WWAI(&t);
+-#ifdef CONFIG_DEBUG_MUTEXES
++#ifdef DEBUG_WW_MUTEXES
+ 	t.ww_class = NULL;
+ #endif
+ 	WWL(&o, &t);
+@@ -1785,7 +1848,7 @@ static void ww_test_edeadlk_normal(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	o2.ctx = &t2;
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 
+@@ -1801,7 +1864,7 @@ static void ww_test_edeadlk_normal(void)
+ 
+ 	o2.ctx = NULL;
+ 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
+-	mutex_unlock(&o2.base);
++	ww_mutex_base_unlock(&o2.base);
+ 	WWU(&o);
+ 
+ 	WWL(&o2, &t);
+@@ -1811,7 +1874,7 @@ static void ww_test_edeadlk_normal_slow(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+@@ -1827,7 +1890,7 @@ static void ww_test_edeadlk_normal_slow(void)
+ 
+ 	o2.ctx = NULL;
+ 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
+-	mutex_unlock(&o2.base);
++	ww_mutex_base_unlock(&o2.base);
+ 	WWU(&o);
+ 
+ 	ww_mutex_lock_slow(&o2, &t);
+@@ -1837,7 +1900,7 @@ static void ww_test_edeadlk_no_unlock(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	o2.ctx = &t2;
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 
+@@ -1853,7 +1916,7 @@ static void ww_test_edeadlk_no_unlock(void)
+ 
+ 	o2.ctx = NULL;
+ 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
+-	mutex_unlock(&o2.base);
++	ww_mutex_base_unlock(&o2.base);
+ 
+ 	WWL(&o2, &t);
+ }
+@@ -1862,7 +1925,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+@@ -1878,7 +1941,7 @@ static void ww_test_edeadlk_no_unlock_slow(void)
+ 
+ 	o2.ctx = NULL;
+ 	mutex_acquire(&o2.base.dep_map, 0, 1, _THIS_IP_);
+-	mutex_unlock(&o2.base);
++	ww_mutex_base_unlock(&o2.base);
+ 
+ 	ww_mutex_lock_slow(&o2, &t);
+ }
+@@ -1887,7 +1950,7 @@ static void ww_test_edeadlk_acquire_more(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+@@ -1908,7 +1971,7 @@ static void ww_test_edeadlk_acquire_more_slow(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+@@ -1929,11 +1992,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+-	mutex_lock(&o3.base);
++	ww_mutex_base_lock(&o3.base);
+ 	mutex_release(&o3.base.dep_map, _THIS_IP_);
+ 	o3.ctx = &t2;
+ 
+@@ -1955,11 +2018,11 @@ static void ww_test_edeadlk_acquire_more_edeadlk_slow(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+-	mutex_lock(&o3.base);
++	ww_mutex_base_lock(&o3.base);
+ 	mutex_release(&o3.base.dep_map, _THIS_IP_);
+ 	o3.ctx = &t2;
+ 
+@@ -1980,7 +2043,7 @@ static void ww_test_edeadlk_acquire_wrong(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+@@ -2005,7 +2068,7 @@ static void ww_test_edeadlk_acquire_wrong_slow(void)
+ {
+ 	int ret;
+ 
+-	mutex_lock(&o2.base);
++	ww_mutex_base_lock(&o2.base);
+ 	mutex_release(&o2.base.dep_map, _THIS_IP_);
+ 	o2.ctx = &t2;
+ 
+@@ -2646,8 +2709,8 @@ static void wait_context_tests(void)
+ 
+ static void local_lock_2(void)
+ {
+-	local_lock_acquire(&local_A);	/* IRQ-ON */
+-	local_lock_release(&local_A);
++	local_lock(&local_A);	/* IRQ-ON */
++	local_unlock(&local_A);
+ 
+ 	HARDIRQ_ENTER();
+ 	spin_lock(&lock_A);		/* IN-IRQ */
+@@ -2656,18 +2719,18 @@ static void local_lock_2(void)
+ 
+ 	HARDIRQ_DISABLE();
+ 	spin_lock(&lock_A);
+-	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle, false */
+-	local_lock_release(&local_A);
++	local_lock(&local_A);	/* IN-IRQ <-> IRQ-ON cycle, false */
++	local_unlock(&local_A);
+ 	spin_unlock(&lock_A);
+ 	HARDIRQ_ENABLE();
+ }
+ 
+ static void local_lock_3A(void)
+ {
+-	local_lock_acquire(&local_A);	/* IRQ-ON */
++	local_lock(&local_A);	/* IRQ-ON */
+ 	spin_lock(&lock_B);		/* IRQ-ON */
+ 	spin_unlock(&lock_B);
+-	local_lock_release(&local_A);
++	local_unlock(&local_A);
+ 
+ 	HARDIRQ_ENTER();
+ 	spin_lock(&lock_A);		/* IN-IRQ */
+@@ -2676,18 +2739,18 @@ static void local_lock_3A(void)
+ 
+ 	HARDIRQ_DISABLE();
+ 	spin_lock(&lock_A);
+-	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+-	local_lock_release(&local_A);
++	local_lock(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
++	local_unlock(&local_A);
+ 	spin_unlock(&lock_A);
+ 	HARDIRQ_ENABLE();
+ }
+ 
+ static void local_lock_3B(void)
+ {
+-	local_lock_acquire(&local_A);	/* IRQ-ON */
++	local_lock(&local_A);	/* IRQ-ON */
+ 	spin_lock(&lock_B);		/* IRQ-ON */
+ 	spin_unlock(&lock_B);
+-	local_lock_release(&local_A);
++	local_unlock(&local_A);
+ 
+ 	HARDIRQ_ENTER();
+ 	spin_lock(&lock_A);		/* IN-IRQ */
+@@ -2696,8 +2759,8 @@ static void local_lock_3B(void)
+ 
+ 	HARDIRQ_DISABLE();
+ 	spin_lock(&lock_A);
+-	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+-	local_lock_release(&local_A);
++	local_lock(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
++	local_unlock(&local_A);
+ 	spin_unlock(&lock_A);
+ 	HARDIRQ_ENABLE();
+ 
+@@ -2812,7 +2875,7 @@ void locking_selftest(void)
+ 	printk("------------------------\n");
+ 	printk("| Locking API testsuite:\n");
+ 	printk("----------------------------------------------------------------------------\n");
+-	printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
++	printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |rtmutex\n");
+ 	printk("  --------------------------------------------------------------------------\n");
+ 
+ 	init_shared_classes();
+@@ -2885,12 +2948,11 @@ void locking_selftest(void)
+ 	DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
+ 
+ 	printk("  --------------------------------------------------------------------------\n");
+-
+ 	/*
+ 	 * irq-context testcases:
+ 	 */
+ 	DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
+-	DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
++	NON_RT(DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A));
+ 	DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
+ 	DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
+ 	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
+diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
+index 199ab201d501..06410209197a 100644
+--- a/lib/nmi_backtrace.c
++++ b/lib/nmi_backtrace.c
+@@ -99,7 +99,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
+ 		 * Allow nested NMI backtraces while serializing
+ 		 * against other CPUs.
+ 		 */
+-		printk_cpu_lock_irqsave(flags);
++		raw_printk_cpu_lock_irqsave(flags);
+ 		if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
+ 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
+ 				cpu, (void *)instruction_pointer(regs));
+@@ -110,7 +110,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
+ 			else
+ 				dump_stack();
+ 		}
+-		printk_cpu_unlock_irqrestore(flags);
++		raw_printk_cpu_unlock_irqrestore(flags);
+ 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+ 		return true;
+ 	}
+diff --git a/lib/scatterlist.c b/lib/scatterlist.c
+index abb3432ed744..d5e82e4a57ad 100644
+--- a/lib/scatterlist.c
++++ b/lib/scatterlist.c
+@@ -828,8 +828,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
+  *   stops @miter.
+  *
+  * Context:
+- *   Don't care if @miter is stopped, or not proceeded yet.
+- *   Otherwise, preemption disabled if the SG_MITER_ATOMIC is set.
++ *   Don't care.
+  *
+  * Returns:
+  *   true if @miter contains the valid mapping.  false if end of sg
+@@ -865,8 +864,7 @@ EXPORT_SYMBOL(sg_miter_skip);
+  *   @miter->addr and @miter->length point to the current mapping.
+  *
+  * Context:
+- *   Preemption disabled if SG_MITER_ATOMIC.  Preemption must stay disabled
+- *   till @miter is stopped.  May sleep if !SG_MITER_ATOMIC.
++ *   May sleep if !SG_MITER_ATOMIC.
+  *
+  * Returns:
+  *   true if @miter contains the next mapping.  false if end of sg
+@@ -906,8 +904,7 @@ EXPORT_SYMBOL(sg_miter_next);
+  *   need to be released during iteration.
+  *
+  * Context:
+- *   Preemption disabled if the SG_MITER_ATOMIC is set.  Don't care
+- *   otherwise.
++ *   Don't care otherwise.
+  */
+ void sg_miter_stop(struct sg_mapping_iter *miter)
+ {
+@@ -922,7 +919,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
+ 			flush_dcache_page(miter->page);
+ 
+ 		if (miter->__flags & SG_MITER_ATOMIC) {
+-			WARN_ON_ONCE(preemptible());
++			WARN_ON_ONCE(!pagefault_disabled());
+ 			kunmap_atomic(miter->addr);
+ 		} else
+ 			kunmap(miter->page);
+diff --git a/localversion-rt b/localversion-rt
+new file mode 100644
+index 000000000000..e2eb19782d4c
+--- /dev/null
++++ b/localversion-rt
+@@ -0,0 +1 @@
++-rt65
+diff --git a/mm/Kconfig b/mm/Kconfig
+index c048dea7e342..88778414465b 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -371,7 +371,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
+ 
+ config TRANSPARENT_HUGEPAGE
+ 	bool "Transparent Hugepage Support"
+-	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
++	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT
+ 	select COMPACTION
+ 	select XARRAY_MULTI
+ 	help
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index b68b2fe639fd..71b7b7371595 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -654,6 +654,35 @@ static u64 flush_next_time;
+ 
+ #define FLUSH_TIME (2UL*HZ)
+ 
++/*
++ * Accessors to ensure that preemption is disabled on PREEMPT_RT because it can
++ * not rely on this as part of an acquired spinlock_t lock. These functions are
++ * never used in hardirq context on PREEMPT_RT and therefore disabling preemtion
++ * is sufficient.
++ */
++static void memcg_stats_lock(void)
++{
++#ifdef CONFIG_PREEMPT_RT
++      preempt_disable();
++#else
++      VM_BUG_ON(!irqs_disabled());
++#endif
++}
++
++static void __memcg_stats_lock(void)
++{
++#ifdef CONFIG_PREEMPT_RT
++      preempt_disable();
++#endif
++}
++
++static void memcg_stats_unlock(void)
++{
++#ifdef CONFIG_PREEMPT_RT
++      preempt_enable();
++#endif
++}
++
+ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
+ {
+ 	unsigned int x;
+@@ -737,6 +766,27 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ 	memcg = pn->memcg;
+ 
++	/*
++	 * The caller from rmap relay on disabled preemption becase they never
++	 * update their counter from in-interrupt context. For these two
++	 * counters we check that the update is never performed from an
++	 * interrupt context while other caller need to have disabled interrupt.
++	 */
++	__memcg_stats_lock();
++	if (IS_ENABLED(CONFIG_DEBUG_VM) && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
++		switch (idx) {
++		case NR_ANON_MAPPED:
++		case NR_FILE_MAPPED:
++		case NR_ANON_THPS:
++		case NR_SHMEM_PMDMAPPED:
++		case NR_FILE_PMDMAPPED:
++			WARN_ON_ONCE(!in_task());
++			break;
++		default:
++			WARN_ON_ONCE(!irqs_disabled());
++		}
++	}
++
+ 	/* Update memcg */
+ 	__this_cpu_add(memcg->vmstats_percpu->state[idx], val);
+ 
+@@ -744,6 +794,7 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ 	__this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
+ 
+ 	memcg_rstat_updated(memcg, val);
++	memcg_stats_unlock();
+ }
+ 
+ /**
+@@ -844,8 +895,10 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
+ 	if (mem_cgroup_disabled())
+ 		return;
+ 
++	memcg_stats_lock();
+ 	__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
+ 	memcg_rstat_updated(memcg, count);
++	memcg_stats_unlock();
+ }
+ 
+ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
+@@ -909,6 +962,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
+  */
+ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
+ {
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		return;
++
+ 	/* threshold event is triggered in finer grain than soft limit */
+ 	if (unlikely(mem_cgroup_event_ratelimit(memcg,
+ 						MEM_CGROUP_TARGET_THRESH))) {
+@@ -2102,39 +2158,37 @@ void unlock_page_memcg(struct page *page)
+ }
+ EXPORT_SYMBOL(unlock_page_memcg);
+ 
+-struct obj_stock {
++struct memcg_stock_pcp {
++	local_lock_t stock_lock;
++	struct mem_cgroup *cached; /* this never be root cgroup */
++	unsigned int nr_pages;
++
+ #ifdef CONFIG_MEMCG_KMEM
+ 	struct obj_cgroup *cached_objcg;
+ 	struct pglist_data *cached_pgdat;
+ 	unsigned int nr_bytes;
+ 	int nr_slab_reclaimable_b;
+ 	int nr_slab_unreclaimable_b;
+-#else
+-	int dummy[0];
+ #endif
+-};
+-
+-struct memcg_stock_pcp {
+-	struct mem_cgroup *cached; /* this never be root cgroup */
+-	unsigned int nr_pages;
+-	struct obj_stock task_obj;
+-	struct obj_stock irq_obj;
+ 
+ 	struct work_struct work;
+ 	unsigned long flags;
+ #define FLUSHING_CACHED_CHARGE	0
+ };
+-static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
++static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = {
++	.stock_lock = INIT_LOCAL_LOCK(stock_lock),
++};
+ static DEFINE_MUTEX(percpu_charge_mutex);
+ 
+ #ifdef CONFIG_MEMCG_KMEM
+-static void drain_obj_stock(struct obj_stock *stock);
++static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock);
+ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ 				     struct mem_cgroup *root_memcg);
+ 
+ #else
+-static inline void drain_obj_stock(struct obj_stock *stock)
++static inline struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
+ {
++	return NULL;
+ }
+ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ 				     struct mem_cgroup *root_memcg)
+@@ -2144,41 +2198,6 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ #endif
+ 
+ /*
+- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
+- * sequence used in this case to access content from object stock is slow.
+- * To optimize for user context access, there are now two object stocks for
+- * task context and interrupt context access respectively.
+- *
+- * The task context object stock can be accessed by disabling preemption only
+- * which is cheap in non-preempt kernel. The interrupt context object stock
+- * can only be accessed after disabling interrupt. User context code can
+- * access interrupt object stock, but not vice versa.
+- */
+-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
+-{
+-	struct memcg_stock_pcp *stock;
+-
+-	if (likely(in_task())) {
+-		*pflags = 0UL;
+-		preempt_disable();
+-		stock = this_cpu_ptr(&memcg_stock);
+-		return &stock->task_obj;
+-	}
+-
+-	local_irq_save(*pflags);
+-	stock = this_cpu_ptr(&memcg_stock);
+-	return &stock->irq_obj;
+-}
+-
+-static inline void put_obj_stock(unsigned long flags)
+-{
+-	if (likely(in_task()))
+-		preempt_enable();
+-	else
+-		local_irq_restore(flags);
+-}
+-
+-/**
+  * consume_stock: Try to consume stocked charge on this cpu.
+  * @memcg: memcg to consume from.
+  * @nr_pages: how many pages to charge.
+@@ -2198,7 +2217,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 	if (nr_pages > MEMCG_CHARGE_BATCH)
+ 		return ret;
+ 
+-	local_irq_save(flags);
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+ 
+ 	stock = this_cpu_ptr(&memcg_stock);
+ 	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
+@@ -2206,7 +2225,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 		ret = true;
+ 	}
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ 
+ 	return ret;
+ }
+@@ -2235,6 +2254,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
+ static void drain_local_stock(struct work_struct *dummy)
+ {
+ 	struct memcg_stock_pcp *stock;
++	struct obj_cgroup *old = NULL;
+ 	unsigned long flags;
+ 
+ 	/*
+@@ -2242,28 +2262,25 @@ static void drain_local_stock(struct work_struct *dummy)
+ 	 * drain_stock races is that we always operate on local CPU stock
+ 	 * here with IRQ disabled
+ 	 */
+-	local_irq_save(flags);
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
+ 
+ 	stock = this_cpu_ptr(&memcg_stock);
+-	drain_obj_stock(&stock->irq_obj);
+-	if (in_task())
+-		drain_obj_stock(&stock->task_obj);
++	old = drain_obj_stock(stock);
+ 	drain_stock(stock);
+ 	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+ 
+-	local_irq_restore(flags);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
++	if (old)
++		obj_cgroup_put(old);
+ }
+ 
+ /*
+  * Cache charges(val) to local per_cpu area.
+  * This will be consumed by consume_stock() function, later.
+  */
+-static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
++static void __refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ {
+ 	struct memcg_stock_pcp *stock;
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
+ 
+ 	stock = this_cpu_ptr(&memcg_stock);
+ 	if (stock->cached != memcg) { /* reset if necessary */
+@@ -2275,8 +2292,15 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
+ 
+ 	if (stock->nr_pages > MEMCG_CHARGE_BATCH)
+ 		drain_stock(stock);
++}
+ 
+-	local_irq_restore(flags);
++static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
++{
++	unsigned long flags;
++
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
++	__refill_stock(memcg, nr_pages);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ }
+ 
+ /*
+@@ -2296,7 +2320,8 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ 	 * as well as workers from this path always operate on the local
+ 	 * per-cpu data. CPU up doesn't touch memcg_stock at all.
+ 	 */
+-	curcpu = get_cpu();
++	migrate_disable();
++	curcpu = smp_processor_id();
+ 	for_each_online_cpu(cpu) {
+ 		struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
+ 		struct mem_cgroup *memcg;
+@@ -2319,7 +2344,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
+ 				schedule_work_on(cpu, &stock->work);
+ 		}
+ 	}
+-	put_cpu();
++	migrate_enable();
+ 	mutex_unlock(&percpu_charge_mutex);
+ }
+ 
+@@ -3084,17 +3109,21 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
+ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+ 		     enum node_stat_item idx, int nr)
+ {
++	struct memcg_stock_pcp *stock;
++	struct obj_cgroup *old = NULL;
+ 	unsigned long flags;
+-	struct obj_stock *stock = get_obj_stock(&flags);
+ 	int *bytes;
+ 
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
++	stock = this_cpu_ptr(&memcg_stock);
++
+ 	/*
+ 	 * Save vmstat data in stock and skip vmstat array update unless
+ 	 * accumulating over a page of vmstat data or when pgdat or idx
+ 	 * changes.
+ 	 */
+ 	if (stock->cached_objcg != objcg) {
+-		drain_obj_stock(stock);
++		old = drain_obj_stock(stock);
+ 		obj_cgroup_get(objcg);
+ 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
+ 				? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0;
+@@ -3138,38 +3167,53 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
+ 	if (nr)
+ 		mod_objcg_mlstate(objcg, pgdat, idx, nr);
+ 
+-	put_obj_stock(flags);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
++	if (old)
++		obj_cgroup_put(old);
+ }
+ 
+ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
+ {
++	struct memcg_stock_pcp *stock;
+ 	unsigned long flags;
+-	struct obj_stock *stock = get_obj_stock(&flags);
+ 	bool ret = false;
+ 
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
++
++	stock = this_cpu_ptr(&memcg_stock);
+ 	if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
+ 		stock->nr_bytes -= nr_bytes;
+ 		ret = true;
+ 	}
+ 
+-	put_obj_stock(flags);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
+ 
+ 	return ret;
+ }
+ 
+-static void drain_obj_stock(struct obj_stock *stock)
++static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock)
+ {
+ 	struct obj_cgroup *old = stock->cached_objcg;
+ 
+ 	if (!old)
+-		return;
++		return NULL;
+ 
+ 	if (stock->nr_bytes) {
+ 		unsigned int nr_pages = stock->nr_bytes >> PAGE_SHIFT;
+ 		unsigned int nr_bytes = stock->nr_bytes & (PAGE_SIZE - 1);
+ 
+-		if (nr_pages)
+-			obj_cgroup_uncharge_pages(old, nr_pages);
++		if (nr_pages) {
++			struct mem_cgroup *memcg;
++
++			memcg = get_mem_cgroup_from_objcg(old);
++
++			if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
++				page_counter_uncharge(&memcg->kmem, nr_pages);
++
++			__refill_stock(memcg, nr_pages);
++
++			css_put(&memcg->css);
++		}
+ 
+ 		/*
+ 		 * The leftover is flushed to the centralized per-memcg value.
+@@ -3204,8 +3248,12 @@ static void drain_obj_stock(struct obj_stock *stock)
+ 		stock->cached_pgdat = NULL;
+ 	}
+ 
+-	obj_cgroup_put(old);
+ 	stock->cached_objcg = NULL;
++	/*
++	 * The `old' objects needs to be released by the caller via
++	 * obj_cgroup_put() outside of memcg_stock_pcp::stock_lock.
++	 */
++	return old;
+ }
+ 
+ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+@@ -3213,13 +3261,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ {
+ 	struct mem_cgroup *memcg;
+ 
+-	if (in_task() && stock->task_obj.cached_objcg) {
+-		memcg = obj_cgroup_memcg(stock->task_obj.cached_objcg);
+-		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+-			return true;
+-	}
+-	if (stock->irq_obj.cached_objcg) {
+-		memcg = obj_cgroup_memcg(stock->irq_obj.cached_objcg);
++	if (stock->cached_objcg) {
++		memcg = obj_cgroup_memcg(stock->cached_objcg);
+ 		if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
+ 			return true;
+ 	}
+@@ -3230,12 +3273,16 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
+ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
+ 			     bool allow_uncharge)
+ {
++	struct memcg_stock_pcp *stock;
++	struct obj_cgroup *old = NULL;
+ 	unsigned long flags;
+-	struct obj_stock *stock = get_obj_stock(&flags);
+ 	unsigned int nr_pages = 0;
+ 
++	local_lock_irqsave(&memcg_stock.stock_lock, flags);
++
++	stock = this_cpu_ptr(&memcg_stock);
+ 	if (stock->cached_objcg != objcg) { /* reset if necessary */
+-		drain_obj_stock(stock);
++		old = drain_obj_stock(stock);
+ 		obj_cgroup_get(objcg);
+ 		stock->cached_objcg = objcg;
+ 		stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes)
+@@ -3249,7 +3296,9 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
+ 		stock->nr_bytes &= (PAGE_SIZE - 1);
+ 	}
+ 
+-	put_obj_stock(flags);
++	local_unlock_irqrestore(&memcg_stock.stock_lock, flags);
++	if (old)
++		obj_cgroup_put(old);
+ 
+ 	if (nr_pages)
+ 		obj_cgroup_uncharge_pages(objcg, nr_pages);
+@@ -3816,8 +3865,12 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
+ 		}
+ 		break;
+ 	case RES_SOFT_LIMIT:
+-		memcg->soft_limit = nr_pages;
+-		ret = 0;
++		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
++			ret = -EOPNOTSUPP;
++		} else {
++			memcg->soft_limit = nr_pages;
++			ret = 0;
++		}
+ 		break;
+ 	}
+ 	return ret ?: nbytes;
+@@ -4798,6 +4851,9 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+ 	char *endp;
+ 	int ret;
+ 
++	if (IS_ENABLED(CONFIG_PREEMPT_RT))
++		return -EOPNOTSUPP;
++
+ 	buf = strstrip(buf);
+ 
+ 	efd = simple_strtoul(buf, &endp, 10);
+@@ -6889,7 +6945,6 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+ 	unsigned long nr_pages;
+ 	struct mem_cgroup *memcg;
+ 	struct obj_cgroup *objcg;
+-	bool use_objcg = PageMemcgKmem(page);
+ 
+ 	VM_BUG_ON_PAGE(PageLRU(page), page);
+ 
+@@ -6898,7 +6953,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+ 	 * page memcg or objcg at this point, we have fully
+ 	 * exclusive access to the page.
+ 	 */
+-	if (use_objcg) {
++	if (PageMemcgKmem(page)) {
+ 		objcg = __page_objcg(page);
+ 		/*
+ 		 * This get matches the put at the end of the function and
+@@ -6926,7 +6981,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+ 
+ 	nr_pages = compound_nr(page);
+ 
+-	if (use_objcg) {
++	if (PageMemcgKmem(page)) {
+ 		ug->nr_memory += nr_pages;
+ 		ug->nr_kmem += nr_pages;
+ 
+@@ -7256,8 +7311,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
+ 	 * important here to have the interrupts disabled because it is the
+ 	 * only synchronisation we have for updating the per-CPU variables.
+ 	 */
+-	VM_BUG_ON(!irqs_disabled());
++	memcg_stats_lock();
+ 	mem_cgroup_charge_statistics(memcg, page, -nr_entries);
++	memcg_stats_unlock();
+ 	memcg_check_events(memcg, page);
+ 
+ 	css_put(&memcg->css);
+diff --git a/mm/memory.c b/mm/memory.c
+index 8d71a82462dd..e2a9f89bbcf2 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -5305,7 +5305,7 @@ void __might_fault(const char *file, int line)
+ 		return;
+ 	if (pagefault_disabled())
+ 		return;
+-	__might_sleep(file, line, 0);
++	__might_sleep(file, line);
+ #if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
+ 	if (current->mm)
+ 		might_lock_read(&current->mm->mmap_lock);
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index f320ee2bd34a..33355028122a 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3149,9 +3149,9 @@ static void drain_local_pages_wq(struct work_struct *work)
+ 	 * cpu which is alright but we also have to make sure to not move to
+ 	 * a different one.
+ 	 */
+-	preempt_disable();
++	migrate_disable();
+ 	drain_local_pages(drain->zone);
+-	preempt_enable();
++	migrate_enable();
+ }
+ 
+ /*
+diff --git a/mm/vmalloc.c b/mm/vmalloc.c
+index 3e482209a1c4..1a59b7b4ff67 100644
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -1918,11 +1918,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
+ 		return ERR_PTR(err);
+ 	}
+ 
+-	vbq = &get_cpu_var(vmap_block_queue);
++	get_cpu_light();
++	vbq = this_cpu_ptr(&vmap_block_queue);
+ 	spin_lock(&vbq->lock);
+ 	list_add_tail_rcu(&vb->free_list, &vbq->free);
+ 	spin_unlock(&vbq->lock);
+-	put_cpu_var(vmap_block_queue);
++	put_cpu_light();
+ 
+ 	return vaddr;
+ }
+@@ -2001,7 +2002,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ 	order = get_order(size);
+ 
+ 	rcu_read_lock();
+-	vbq = &get_cpu_var(vmap_block_queue);
++	get_cpu_light();
++	vbq = this_cpu_ptr(&vmap_block_queue);
+ 	list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+ 		unsigned long pages_off;
+ 
+@@ -2024,7 +2026,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
+ 		break;
+ 	}
+ 
+-	put_cpu_var(vmap_block_queue);
++	put_cpu_light();
+ 	rcu_read_unlock();
+ 
+ 	/* Allocate new block if nothing was found */
+diff --git a/mm/workingset.c b/mm/workingset.c
+index 880d882f3325..2a9ed5aeb6fa 100644
+--- a/mm/workingset.c
++++ b/mm/workingset.c
+@@ -433,6 +433,8 @@ static struct list_lru shadow_nodes;
+ 
+ void workingset_update_node(struct xa_node *node)
+ {
++	struct address_space *mapping;
++
+ 	/*
+ 	 * Track non-empty nodes that contain only shadow entries;
+ 	 * unlink those that contain pages or are being freed.
+@@ -441,7 +443,8 @@ void workingset_update_node(struct xa_node *node)
+ 	 * already where they should be. The list_empty() test is safe
+ 	 * as node->private_list is protected by the i_pages lock.
+ 	 */
+-	VM_WARN_ON_ONCE(!irqs_disabled());  /* For __inc_lruvec_page_state */
++	mapping = container_of(node->array, struct address_space, i_pages);
++	lockdep_assert_held(&mapping->i_pages.xa_lock);
+ 
+ 	if (node->count && node->count == node->nr_values) {
+ 		if (list_empty(&node->private_list)) {
+diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
+index 439deb8decbc..a66431853394 100644
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -57,6 +57,7 @@
+ #include <linux/wait.h>
+ #include <linux/pagemap.h>
+ #include <linux/fs.h>
++#include <linux/local_lock.h>
+ 
+ #define ZSPAGE_MAGIC	0x58
+ 
+@@ -77,6 +78,20 @@
+ 
+ #define ZS_HANDLE_SIZE (sizeof(unsigned long))
+ 
++#ifdef CONFIG_PREEMPT_RT
++
++struct zsmalloc_handle {
++	unsigned long addr;
++	spinlock_t lock;
++};
++
++#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
++
++#else
++
++#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long))
++#endif
++
+ /*
+  * Object location (<PFN>, <obj_idx>) is encoded as
+  * a single (unsigned long) handle value.
+@@ -293,6 +308,7 @@ struct zspage {
+ };
+ 
+ struct mapping_area {
++	local_lock_t lock;
+ 	char *vm_buf; /* copy buffer for objects that span pages */
+ 	char *vm_addr; /* address of kmap_atomic()'ed pages */
+ 	enum zs_mapmode vm_mm; /* mapping mode */
+@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
+ 
+ static int create_cache(struct zs_pool *pool)
+ {
+-	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
++	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE,
+ 					0, 0, NULL);
+ 	if (!pool->handle_cachep)
+ 		return 1;
+@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool *pool)
+ 
+ static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
+ {
+-	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
+-			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
++	void *p;
++
++	p = kmem_cache_alloc(pool->handle_cachep,
++			     gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
++#ifdef CONFIG_PREEMPT_RT
++	if (p) {
++		struct zsmalloc_handle *zh = p;
++
++		spin_lock_init(&zh->lock);
++	}
++#endif
++	return (unsigned long)p;
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle)
++{
++	return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1));
++}
++#endif
++
+ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
+ {
+ 	kmem_cache_free(pool->handle_cachep, (void *)handle);
+@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+ 
+ static void record_obj(unsigned long handle, unsigned long obj)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++	WRITE_ONCE(zh->addr, obj);
++#else
+ 	/*
+ 	 * lsb of @obj represents handle lock while other bits
+ 	 * represent object value the handle is pointing so
+ 	 * updating shouldn't do store tearing.
+ 	 */
+ 	WRITE_ONCE(*(unsigned long *)handle, obj);
++#endif
+ }
+ 
+ /* zpool driver */
+@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc");
+ #endif /* CONFIG_ZPOOL */
+ 
+ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
+-static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
++static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = {
++	.lock	= INIT_LOCAL_LOCK(lock),
++};
+ 
+ static bool is_zspage_isolated(struct zspage *zspage)
+ {
+@@ -862,7 +903,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
+ 
+ static unsigned long handle_to_obj(unsigned long handle)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++	return zh->addr;
++#else
+ 	return *(unsigned long *)handle;
++#endif
+ }
+ 
+ static unsigned long obj_to_head(struct page *page, void *obj)
+@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct page *page, void *obj)
+ 
+ static inline int testpin_tag(unsigned long handle)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++	return spin_is_locked(&zh->lock);
++#else
+ 	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+ 
+ static inline int trypin_tag(unsigned long handle)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++	return spin_trylock(&zh->lock);
++#else
+ 	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+ 
+ static void pin_tag(unsigned long handle) __acquires(bitlock)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++	return spin_lock(&zh->lock);
++#else
+ 	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+ 
+ static void unpin_tag(unsigned long handle) __releases(bitlock)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
++
++	return spin_unlock(&zh->lock);
++#else
+ 	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
++#endif
+ }
+ 
+ static void reset_page(struct page *page)
+@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+ 	class = pool->size_class[class_idx];
+ 	off = (class->size * obj_idx) & ~PAGE_MASK;
+ 
+-	area = &get_cpu_var(zs_map_area);
++	local_lock(&zs_map_area.lock);
++	area = this_cpu_ptr(&zs_map_area);
+ 	area->vm_mm = mm;
+ 	if (off + class->size <= PAGE_SIZE) {
+ 		/* this object is contained entirely within a page */
+@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+ 
+ 		__zs_unmap_object(area, pages, off, class->size);
+ 	}
+-	put_cpu_var(zs_map_area);
++	local_unlock(&zs_map_area.lock);
+ 
+ 	migrate_read_unlock(zspage);
+ 	unpin_tag(handle);
+diff --git a/net/Kconfig b/net/Kconfig
+index 76a3385943e5..bd7386eede23 100644
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -292,7 +292,7 @@ config CGROUP_NET_CLASSID
+ 
+ config NET_RX_BUSY_POLL
+ 	bool
+-	default y
++	default y if !PREEMPT_RT
+ 
+ config BQL
+ 	bool
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 4d698ccf4172..4bed27338ed9 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -225,14 +225,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
+ static inline void rps_lock(struct softnet_data *sd)
+ {
+ #ifdef CONFIG_RPS
+-	spin_lock(&sd->input_pkt_queue.lock);
++	raw_spin_lock(&sd->input_pkt_queue.raw_lock);
+ #endif
+ }
+ 
+ static inline void rps_unlock(struct softnet_data *sd)
+ {
+ #ifdef CONFIG_RPS
+-	spin_unlock(&sd->input_pkt_queue.lock);
++	raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
+ #endif
+ }
+ 
+@@ -3046,6 +3046,7 @@ static void __netif_reschedule(struct Qdisc *q)
+ 	sd->output_queue_tailp = &q->next_sched;
+ 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ 	local_irq_restore(flags);
++	preempt_check_resched_rt();
+ }
+ 
+ void __netif_schedule(struct Qdisc *q)
+@@ -3108,6 +3109,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
+ 	__this_cpu_write(softnet_data.completion_queue, skb);
+ 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ 	local_irq_restore(flags);
++	preempt_check_resched_rt();
+ }
+ EXPORT_SYMBOL(__dev_kfree_skb_irq);
+ 
+@@ -3841,7 +3843,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+ 	 * This permits qdisc->running owner to get the lock more
+ 	 * often and dequeue packets faster.
+ 	 */
++#ifdef CONFIG_PREEMPT_RT
++	contended = true;
++#else
+ 	contended = qdisc_is_running(q);
++#endif
+ 	if (unlikely(contended))
+ 		spin_lock(&q->busylock);
+ 
+@@ -4669,6 +4675,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+ 	rps_unlock(sd);
+ 
+ 	local_irq_restore(flags);
++	preempt_check_resched_rt();
+ 
+ 	atomic_long_inc(&skb->dev->rx_dropped);
+ 	kfree_skb(skb);
+@@ -4909,7 +4916,7 @@ static int netif_rx_internal(struct sk_buff *skb)
+ 		struct rps_dev_flow voidflow, *rflow = &voidflow;
+ 		int cpu;
+ 
+-		preempt_disable();
++		migrate_disable();
+ 		rcu_read_lock();
+ 
+ 		cpu = get_rps_cpu(skb->dev, skb, &rflow);
+@@ -4919,14 +4926,14 @@ static int netif_rx_internal(struct sk_buff *skb)
+ 		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+ 
+ 		rcu_read_unlock();
+-		preempt_enable();
++		migrate_enable();
+ 	} else
+ #endif
+ 	{
+ 		unsigned int qtail;
+ 
+-		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+-		put_cpu();
++		ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
++		put_cpu_light();
+ 	}
+ 	return ret;
+ }
+@@ -4965,11 +4972,9 @@ int netif_rx_ni(struct sk_buff *skb)
+ 
+ 	trace_netif_rx_ni_entry(skb);
+ 
+-	preempt_disable();
++	local_bh_disable();
+ 	err = netif_rx_internal(skb);
+-	if (local_softirq_pending())
+-		do_softirq();
+-	preempt_enable();
++	local_bh_enable();
+ 	trace_netif_rx_ni_exit(err);
+ 
+ 	return err;
+@@ -6413,12 +6418,14 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+ 		sd->rps_ipi_list = NULL;
+ 
+ 		local_irq_enable();
++		preempt_check_resched_rt();
+ 
+ 		/* Send pending IPI's to kick RPS processing on remote cpus. */
+ 		net_rps_send_ipi(remsd);
+ 	} else
+ #endif
+ 		local_irq_enable();
++	preempt_check_resched_rt();
+ }
+ 
+ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
+@@ -6496,6 +6503,7 @@ void __napi_schedule(struct napi_struct *n)
+ 	local_irq_save(flags);
+ 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ 	local_irq_restore(flags);
++	preempt_check_resched_rt();
+ }
+ EXPORT_SYMBOL(__napi_schedule);
+ 
+@@ -11316,6 +11324,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
+ 
+ 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+ 	local_irq_enable();
++	preempt_check_resched_rt();
+ 
+ #ifdef CONFIG_RPS
+ 	remsd = oldsd->rps_ipi_list;
+@@ -11329,7 +11338,7 @@ static int dev_cpu_dead(unsigned int oldcpu)
+ 		netif_rx_ni(skb);
+ 		input_queue_head_incr(oldsd);
+ 	}
+-	while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
++	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+ 		netif_rx_ni(skb);
+ 		input_queue_head_incr(oldsd);
+ 	}
+@@ -11644,7 +11653,7 @@ static int __init net_dev_init(void)
+ 
+ 		INIT_WORK(flush, flush_backlog);
+ 
+-		skb_queue_head_init(&sd->input_pkt_queue);
++		skb_queue_head_init_raw(&sd->input_pkt_queue);
+ 		skb_queue_head_init(&sd->process_queue);
+ #ifdef CONFIG_XFRM_OFFLOAD
+ 		skb_queue_head_init(&sd->xfrm_backlog);
+diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
+index 8e582e29a41e..4fcbdd71c59f 100644
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -40,10 +40,10 @@
+  */
+ 
+ struct net_rate_estimator {
+-	struct gnet_stats_basic_packed	*bstats;
++	struct gnet_stats_basic_sync	*bstats;
+ 	spinlock_t		*stats_lock;
+-	seqcount_t		*running;
+-	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
++	bool			running;
++	struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ 	u8			ewma_log;
+ 	u8			intvl_log; /* period : (250ms << intvl_log) */
+ 
+@@ -60,13 +60,13 @@ struct net_rate_estimator {
+ };
+ 
+ static void est_fetch_counters(struct net_rate_estimator *e,
+-			       struct gnet_stats_basic_packed *b)
++			       struct gnet_stats_basic_sync *b)
+ {
+-	memset(b, 0, sizeof(*b));
++	gnet_stats_basic_sync_init(b);
+ 	if (e->stats_lock)
+ 		spin_lock(e->stats_lock);
+ 
+-	__gnet_stats_copy_basic(e->running, b, e->cpu_bstats, e->bstats);
++	gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
+ 
+ 	if (e->stats_lock)
+ 		spin_unlock(e->stats_lock);
+@@ -76,14 +76,18 @@ static void est_fetch_counters(struct net_rate_estimator *e,
+ static void est_timer(struct timer_list *t)
+ {
+ 	struct net_rate_estimator *est = from_timer(est, t, timer);
+-	struct gnet_stats_basic_packed b;
++	struct gnet_stats_basic_sync b;
++	u64 b_bytes, b_packets;
+ 	u64 rate, brate;
+ 
+ 	est_fetch_counters(est, &b);
+-	brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
++	b_bytes = u64_stats_read(&b.bytes);
++	b_packets = u64_stats_read(&b.packets);
++
++	brate = (b_bytes - est->last_bytes) << (10 - est->intvl_log);
+ 	brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
+ 
+-	rate = (b.packets - est->last_packets) << (10 - est->intvl_log);
++	rate = (b_packets - est->last_packets) << (10 - est->intvl_log);
+ 	rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
+ 
+ 	write_seqcount_begin(&est->seq);
+@@ -91,8 +95,8 @@ static void est_timer(struct timer_list *t)
+ 	est->avpps += rate;
+ 	write_seqcount_end(&est->seq);
+ 
+-	est->last_bytes = b.bytes;
+-	est->last_packets = b.packets;
++	est->last_bytes = b_bytes;
++	est->last_packets = b_packets;
+ 
+ 	est->next_jiffies += ((HZ/4) << est->intvl_log);
+ 
+@@ -109,7 +113,9 @@ static void est_timer(struct timer_list *t)
+  * @cpu_bstats: bstats per cpu
+  * @rate_est: rate estimator statistics
+  * @lock: lock for statistics and control path
+- * @running: qdisc running seqcount
++ * @running: true if @bstats represents a running qdisc, thus @bstats'
++ *           internal values might change during basic reads. Only used
++ *           if @bstats_cpu is NULL
+  * @opt: rate estimator configuration TLV
+  *
+  * Creates a new rate estimator with &bstats as source and &rate_est
+@@ -121,16 +127,16 @@ static void est_timer(struct timer_list *t)
+  * Returns 0 on success or a negative error code.
+  *
+  */
+-int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+-		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
++		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ 		      struct net_rate_estimator __rcu **rate_est,
+ 		      spinlock_t *lock,
+-		      seqcount_t *running,
++		      bool running,
+ 		      struct nlattr *opt)
+ {
+ 	struct gnet_estimator *parm = nla_data(opt);
+ 	struct net_rate_estimator *old, *est;
+-	struct gnet_stats_basic_packed b;
++	struct gnet_stats_basic_sync b;
+ 	int intvl_log;
+ 
+ 	if (nla_len(opt) < sizeof(*parm))
+@@ -164,8 +170,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
+ 	est_fetch_counters(est, &b);
+ 	if (lock)
+ 		local_bh_enable();
+-	est->last_bytes = b.bytes;
+-	est->last_packets = b.packets;
++	est->last_bytes = u64_stats_read(&b.bytes);
++	est->last_packets = u64_stats_read(&b.packets);
+ 
+ 	if (lock)
+ 		spin_lock_bh(lock);
+@@ -214,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
+  * @cpu_bstats: bstats per cpu
+  * @rate_est: rate estimator statistics
+  * @lock: lock for statistics and control path
+- * @running: qdisc running seqcount (might be NULL)
++ * @running: true if @bstats represents a running qdisc, thus @bstats'
++ *           internal values might change during basic reads. Only used
++ *           if @cpu_bstats is NULL
+  * @opt: rate estimator configuration TLV
+  *
+  * Replaces the configuration of a rate estimator by calling
+@@ -222,11 +230,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
+  *
+  * Returns 0 on success or a negative error code.
+  */
+-int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
+-			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
++int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
++			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ 			  struct net_rate_estimator __rcu **rate_est,
+ 			  spinlock_t *lock,
+-			  seqcount_t *running, struct nlattr *opt)
++			  bool running, struct nlattr *opt)
+ {
+ 	return gen_new_estimator(bstats, cpu_bstats, rate_est,
+ 				 lock, running, opt);
+diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
+index e491b083b348..a10335b4ba2d 100644
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -18,7 +18,7 @@
+ #include <linux/gen_stats.h>
+ #include <net/netlink.h>
+ #include <net/gen_stats.h>
+-
++#include <net/sch_generic.h>
+ 
+ static inline int
+ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size, int padattr)
+@@ -114,63 +114,112 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
+ }
+ EXPORT_SYMBOL(gnet_stats_start_copy);
+ 
+-static void
+-__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
+-			    struct gnet_stats_basic_cpu __percpu *cpu)
++/* Must not be inlined, due to u64_stats seqcount_t lockdep key */
++void gnet_stats_basic_sync_init(struct gnet_stats_basic_sync *b)
+ {
++	u64_stats_set(&b->bytes, 0);
++	u64_stats_set(&b->packets, 0);
++	u64_stats_init(&b->syncp);
++}
++EXPORT_SYMBOL(gnet_stats_basic_sync_init);
++
++static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
++				     struct gnet_stats_basic_sync __percpu *cpu)
++{
++	u64 t_bytes = 0, t_packets = 0;
+ 	int i;
+ 
+ 	for_each_possible_cpu(i) {
+-		struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
++		struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
+ 		unsigned int start;
+ 		u64 bytes, packets;
+ 
+ 		do {
+ 			start = u64_stats_fetch_begin_irq(&bcpu->syncp);
+-			bytes = bcpu->bstats.bytes;
+-			packets = bcpu->bstats.packets;
++			bytes = u64_stats_read(&bcpu->bytes);
++			packets = u64_stats_read(&bcpu->packets);
+ 		} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
+ 
+-		bstats->bytes += bytes;
+-		bstats->packets += packets;
++		t_bytes += bytes;
++		t_packets += packets;
++	}
++	_bstats_update(bstats, t_bytes, t_packets);
++}
++
++void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
++			  struct gnet_stats_basic_sync __percpu *cpu,
++			  struct gnet_stats_basic_sync *b, bool running)
++{
++	unsigned int start;
++	u64 bytes = 0;
++	u64 packets = 0;
++
++	WARN_ON_ONCE((cpu || running) && in_hardirq());
++
++	if (cpu) {
++		gnet_stats_add_basic_cpu(bstats, cpu);
++		return;
+ 	}
++	do {
++		if (running)
++			start = u64_stats_fetch_begin_irq(&b->syncp);
++		bytes = u64_stats_read(&b->bytes);
++		packets = u64_stats_read(&b->packets);
++	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
++
++	_bstats_update(bstats, bytes, packets);
+ }
++EXPORT_SYMBOL(gnet_stats_add_basic);
+ 
+-void
+-__gnet_stats_copy_basic(const seqcount_t *running,
+-			struct gnet_stats_basic_packed *bstats,
+-			struct gnet_stats_basic_cpu __percpu *cpu,
+-			struct gnet_stats_basic_packed *b)
++static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets,
++				  struct gnet_stats_basic_sync __percpu *cpu,
++				  struct gnet_stats_basic_sync *b, bool running)
+ {
+-	unsigned int seq;
++	unsigned int start;
+ 
+ 	if (cpu) {
+-		__gnet_stats_copy_basic_cpu(bstats, cpu);
++		u64 t_bytes = 0, t_packets = 0;
++		int i;
++
++		for_each_possible_cpu(i) {
++			struct gnet_stats_basic_sync *bcpu = per_cpu_ptr(cpu, i);
++			unsigned int start;
++			u64 bytes, packets;
++
++			do {
++				start = u64_stats_fetch_begin_irq(&bcpu->syncp);
++				bytes = u64_stats_read(&bcpu->bytes);
++				packets = u64_stats_read(&bcpu->packets);
++			} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
++
++			t_bytes += bytes;
++			t_packets += packets;
++		}
++		*ret_bytes = t_bytes;
++		*ret_packets = t_packets;
+ 		return;
+ 	}
+ 	do {
+ 		if (running)
+-			seq = read_seqcount_begin(running);
+-		bstats->bytes = b->bytes;
+-		bstats->packets = b->packets;
+-	} while (running && read_seqcount_retry(running, seq));
++			start = u64_stats_fetch_begin_irq(&b->syncp);
++		*ret_bytes = u64_stats_read(&b->bytes);
++		*ret_packets = u64_stats_read(&b->packets);
++	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+ }
+-EXPORT_SYMBOL(__gnet_stats_copy_basic);
+ 
+ static int
+-___gnet_stats_copy_basic(const seqcount_t *running,
+-			 struct gnet_dump *d,
+-			 struct gnet_stats_basic_cpu __percpu *cpu,
+-			 struct gnet_stats_basic_packed *b,
+-			 int type)
++___gnet_stats_copy_basic(struct gnet_dump *d,
++			 struct gnet_stats_basic_sync __percpu *cpu,
++			 struct gnet_stats_basic_sync *b,
++			 int type, bool running)
+ {
+-	struct gnet_stats_basic_packed bstats = {0};
++	u64 bstats_bytes, bstats_packets;
+ 
+-	__gnet_stats_copy_basic(running, &bstats, cpu, b);
++	gnet_stats_read_basic(&bstats_bytes, &bstats_packets, cpu, b, running);
+ 
+ 	if (d->compat_tc_stats && type == TCA_STATS_BASIC) {
+-		d->tc_stats.bytes = bstats.bytes;
+-		d->tc_stats.packets = bstats.packets;
++		d->tc_stats.bytes = bstats_bytes;
++		d->tc_stats.packets = bstats_packets;
+ 	}
+ 
+ 	if (d->tail) {
+@@ -178,24 +227,28 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
+ 		int res;
+ 
+ 		memset(&sb, 0, sizeof(sb));
+-		sb.bytes = bstats.bytes;
+-		sb.packets = bstats.packets;
++		sb.bytes = bstats_bytes;
++		sb.packets = bstats_packets;
+ 		res = gnet_stats_copy(d, type, &sb, sizeof(sb), TCA_STATS_PAD);
+-		if (res < 0 || sb.packets == bstats.packets)
++		if (res < 0 || sb.packets == bstats_packets)
+ 			return res;
+ 		/* emit 64bit stats only if needed */
+-		return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats.packets,
+-				       sizeof(bstats.packets), TCA_STATS_PAD);
++		return gnet_stats_copy(d, TCA_STATS_PKT64, &bstats_packets,
++				       sizeof(bstats_packets), TCA_STATS_PAD);
+ 	}
+ 	return 0;
+ }
+ 
+ /**
+  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+- * @running: seqcount_t pointer
+  * @d: dumping handle
+  * @cpu: copy statistic per cpu
+  * @b: basic statistics
++ * @running: true if @b represents a running qdisc, thus @b's
++ *           internal values might change during basic reads.
++ *           Only used if @cpu is NULL
++ *
++ * Context: task; must not be run from IRQ or BH contexts
+  *
+  * Appends the basic statistics to the top level TLV created by
+  * gnet_stats_start_copy().
+@@ -204,22 +257,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
+  * if the room in the socket buffer was not sufficient.
+  */
+ int
+-gnet_stats_copy_basic(const seqcount_t *running,
+-		      struct gnet_dump *d,
+-		      struct gnet_stats_basic_cpu __percpu *cpu,
+-		      struct gnet_stats_basic_packed *b)
++gnet_stats_copy_basic(struct gnet_dump *d,
++		      struct gnet_stats_basic_sync __percpu *cpu,
++		      struct gnet_stats_basic_sync *b,
++		      bool running)
+ {
+-	return ___gnet_stats_copy_basic(running, d, cpu, b,
+-					TCA_STATS_BASIC);
++	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_basic);
+ 
+ /**
+  * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
+- * @running: seqcount_t pointer
+  * @d: dumping handle
+  * @cpu: copy statistic per cpu
+  * @b: basic statistics
++ * @running: true if @b represents a running qdisc, thus @b's
++ *           internal values might change during basic reads.
++ *           Only used if @cpu is NULL
++ *
++ * Context: task; must not be run from IRQ or BH contexts
+  *
+  * Appends the basic statistics to the top level TLV created by
+  * gnet_stats_start_copy().
+@@ -228,13 +284,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
+  * if the room in the socket buffer was not sufficient.
+  */
+ int
+-gnet_stats_copy_basic_hw(const seqcount_t *running,
+-			 struct gnet_dump *d,
+-			 struct gnet_stats_basic_cpu __percpu *cpu,
+-			 struct gnet_stats_basic_packed *b)
++gnet_stats_copy_basic_hw(struct gnet_dump *d,
++			 struct gnet_stats_basic_sync __percpu *cpu,
++			 struct gnet_stats_basic_sync *b,
++			 bool running)
+ {
+-	return ___gnet_stats_copy_basic(running, d, cpu, b,
+-					TCA_STATS_BASIC_HW);
++	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
+ 
+@@ -282,16 +337,15 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_rate_est);
+ 
+-static void
+-__gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
+-			    const struct gnet_stats_queue __percpu *q)
++static void gnet_stats_add_queue_cpu(struct gnet_stats_queue *qstats,
++				     const struct gnet_stats_queue __percpu *q)
+ {
+ 	int i;
+ 
+ 	for_each_possible_cpu(i) {
+ 		const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
+ 
+-		qstats->qlen = 0;
++		qstats->qlen += qcpu->backlog;
+ 		qstats->backlog += qcpu->backlog;
+ 		qstats->drops += qcpu->drops;
+ 		qstats->requeues += qcpu->requeues;
+@@ -299,24 +353,21 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
+ 	}
+ }
+ 
+-void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
+-			     const struct gnet_stats_queue __percpu *cpu,
+-			     const struct gnet_stats_queue *q,
+-			     __u32 qlen)
++void gnet_stats_add_queue(struct gnet_stats_queue *qstats,
++			  const struct gnet_stats_queue __percpu *cpu,
++			  const struct gnet_stats_queue *q)
+ {
+ 	if (cpu) {
+-		__gnet_stats_copy_queue_cpu(qstats, cpu);
++		gnet_stats_add_queue_cpu(qstats, cpu);
+ 	} else {
+-		qstats->qlen = q->qlen;
+-		qstats->backlog = q->backlog;
+-		qstats->drops = q->drops;
+-		qstats->requeues = q->requeues;
+-		qstats->overlimits = q->overlimits;
++		qstats->qlen += q->qlen;
++		qstats->backlog += q->backlog;
++		qstats->drops += q->drops;
++		qstats->requeues += q->requeues;
++		qstats->overlimits += q->overlimits;
+ 	}
+-
+-	qstats->qlen = qlen;
+ }
+-EXPORT_SYMBOL(__gnet_stats_copy_queue);
++EXPORT_SYMBOL(gnet_stats_add_queue);
+ 
+ /**
+  * gnet_stats_copy_queue - copy queue statistics into statistics TLV
+@@ -339,7 +390,8 @@ gnet_stats_copy_queue(struct gnet_dump *d,
+ {
+ 	struct gnet_stats_queue qstats = {0};
+ 
+-	__gnet_stats_copy_queue(&qstats, cpu_q, q, qlen);
++	gnet_stats_add_queue(&qstats, cpu_q, q);
++	qstats.qlen = qlen;
+ 
+ 	if (d->compat_tc_stats) {
+ 		d->tc_stats.drops = qstats.drops;
+diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
+index 0d5c422f8745..8aec1b529364 100644
+--- a/net/netfilter/xt_RATEEST.c
++++ b/net/netfilter/xt_RATEEST.c
+@@ -94,11 +94,11 @@ static unsigned int
+ xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
+ {
+ 	const struct xt_rateest_target_info *info = par->targinfo;
+-	struct gnet_stats_basic_packed *stats = &info->est->bstats;
++	struct gnet_stats_basic_sync *stats = &info->est->bstats;
+ 
+ 	spin_lock_bh(&info->est->lock);
+-	stats->bytes += skb->len;
+-	stats->packets++;
++	u64_stats_add(&stats->bytes, skb->len);
++	u64_stats_inc(&stats->packets);
+ 	spin_unlock_bh(&info->est->lock);
+ 
+ 	return XT_CONTINUE;
+@@ -143,6 +143,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
+ 	if (!est)
+ 		goto err1;
+ 
++	gnet_stats_basic_sync_init(&est->bstats);
+ 	strlcpy(est->name, info->name, sizeof(est->name));
+ 	spin_lock_init(&est->lock);
+ 	est->refcnt		= 1;
+diff --git a/net/sched/act_api.c b/net/sched/act_api.c
+index d775676956bf..94c05713ecf8 100644
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -486,16 +486,18 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+ 		atomic_set(&p->tcfa_bindcnt, 1);
+ 
+ 	if (cpustats) {
+-		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
++		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ 		if (!p->cpu_bstats)
+ 			goto err1;
+-		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
++		p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ 		if (!p->cpu_bstats_hw)
+ 			goto err2;
+ 		p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ 		if (!p->cpu_qstats)
+ 			goto err3;
+ 	}
++	gnet_stats_basic_sync_init(&p->tcfa_bstats);
++	gnet_stats_basic_sync_init(&p->tcfa_bstats_hw);
+ 	spin_lock_init(&p->tcfa_lock);
+ 	p->tcfa_index = index;
+ 	p->tcfa_tm.install = jiffies;
+@@ -505,7 +507,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
+ 	if (est) {
+ 		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
+ 					&p->tcfa_rate_est,
+-					&p->tcfa_lock, NULL, est);
++					&p->tcfa_lock, false, est);
+ 		if (err)
+ 			goto err4;
+ 	}
+@@ -1141,13 +1143,13 @@ void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets,
+ 			     u64 drops, bool hw)
+ {
+ 	if (a->cpu_bstats) {
+-		_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
++		_bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+ 
+ 		this_cpu_ptr(a->cpu_qstats)->drops += drops;
+ 
+ 		if (hw)
+-			_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+-					   bytes, packets);
++			_bstats_update(this_cpu_ptr(a->cpu_bstats_hw),
++				       bytes, packets);
+ 		return;
+ 	}
+ 
+@@ -1186,9 +1188,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
+ 	if (err < 0)
+ 		goto errout;
+ 
+-	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+-	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+-				     &p->tcfa_bstats_hw) < 0 ||
++	if (gnet_stats_copy_basic(&d, p->cpu_bstats,
++				  &p->tcfa_bstats, false) < 0 ||
++	    gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
++				     &p->tcfa_bstats_hw, false) < 0 ||
+ 	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
+ 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
+ 				  &p->tcfa_qstats,
+diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
+index 2a05bad56ef3..a77d8908e737 100644
+--- a/net/sched/act_bpf.c
++++ b/net/sched/act_bpf.c
+@@ -41,7 +41,7 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
+ 	int action, filter_res;
+ 
+ 	tcf_lastuse_update(&prog->tcf_tm);
+-	bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
+ 
+ 	filter = rcu_dereference(prog->filter);
+ 	if (at_ingress) {
+diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
+index ec987ec75807..41ba55e60b1b 100644
+--- a/net/sched/act_ife.c
++++ b/net/sched/act_ife.c
+@@ -718,7 +718,7 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
+ 	u8 *tlv_data;
+ 	u16 metalen;
+ 
+-	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+ 	tcf_lastuse_update(&ife->tcf_tm);
+ 
+ 	if (skb_at_tc_ingress(skb))
+@@ -806,7 +806,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
+ 			exceed_mtu = true;
+ 	}
+ 
+-	bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
+ 	tcf_lastuse_update(&ife->tcf_tm);
+ 
+ 	if (!metalen) {		/* no metadata to send */
+diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
+index d010c5b8e83b..d39b74331c26 100644
+--- a/net/sched/act_mpls.c
++++ b/net/sched/act_mpls.c
+@@ -59,7 +59,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
+ 	int ret, mac_len;
+ 
+ 	tcf_lastuse_update(&m->tcf_tm);
+-	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(m->common.cpu_bstats), skb);
+ 
+ 	/* Ensure 'data' points at mac_header prior calling mpls manipulating
+ 	 * functions.
+diff --git a/net/sched/act_police.c b/net/sched/act_police.c
+index db1d021c16be..d4ac56e4579c 100644
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
+ 					    police->common.cpu_bstats,
+ 					    &police->tcf_rate_est,
+ 					    &police->tcf_lock,
+-					    NULL, est);
++					    false, est);
+ 		if (err)
+ 			goto failure;
+ 	} else if (tb[TCA_POLICE_AVRATE] &&
+@@ -262,7 +262,7 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
+ 	int ret;
+ 
+ 	tcf_lastuse_update(&police->tcf_tm);
+-	bstats_cpu_update(this_cpu_ptr(police->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(police->common.cpu_bstats), skb);
+ 
+ 	ret = READ_ONCE(police->tcf_action);
+ 	p = rcu_dereference_bh(police->params);
+diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
+index ca67d9644917..ef35df94182f 100644
+--- a/net/sched/act_sample.c
++++ b/net/sched/act_sample.c
+@@ -170,7 +170,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
+ 	int retval;
+ 
+ 	tcf_lastuse_update(&s->tcf_tm);
+-	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(s->common.cpu_bstats), skb);
+ 	retval = READ_ONCE(s->tcf_action);
+ 
+ 	psample_group = rcu_dereference_bh(s->psample_group);
+diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
+index 788527154025..8c1d60bde93e 100644
+--- a/net/sched/act_simple.c
++++ b/net/sched/act_simple.c
+@@ -36,7 +36,8 @@ static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a,
+ 	 * then it would look like "hello_3" (without quotes)
+ 	 */
+ 	pr_info("simple: %s_%llu\n",
+-	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
++		(char *)d->tcfd_defdata,
++		u64_stats_read(&d->tcf_bstats.packets));
+ 	spin_unlock(&d->tcf_lock);
+ 	return d->tcf_action;
+ }
+diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
+index 6088ceaf582e..f6df717b9f17 100644
+--- a/net/sched/act_skbedit.c
++++ b/net/sched/act_skbedit.c
+@@ -31,7 +31,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
+ 	int action;
+ 
+ 	tcf_lastuse_update(&d->tcf_tm);
+-	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ 
+ 	params = rcu_dereference_bh(d->params);
+ 	action = READ_ONCE(d->tcf_action);
+diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
+index ee9cc0abf9e1..2083612d8780 100644
+--- a/net/sched/act_skbmod.c
++++ b/net/sched/act_skbmod.c
+@@ -31,7 +31,7 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
+ 	u64 flags;
+ 
+ 	tcf_lastuse_update(&d->tcf_tm);
+-	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
++	bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ 
+ 	action = READ_ONCE(d->tcf_action);
+ 	if (unlikely(action == TC_ACT_SHOT))
+diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
+index 328db5e1b0ea..c910046bbe4f 100644
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -884,7 +884,7 @@ static void qdisc_offload_graft_root(struct net_device *dev,
+ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+ 			 u32 portid, u32 seq, u16 flags, int event)
+ {
+-	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
++	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
+ 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
+ 	struct tcmsg *tcm;
+ 	struct nlmsghdr  *nlh;
+@@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+ 		cpu_qstats = q->cpu_qstats;
+ 	}
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+-				  &d, cpu_bstats, &q->bstats) < 0 ||
++	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
+ 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+ 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+ 		goto nla_put_failure;
+@@ -1275,26 +1274,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
+ 		rcu_assign_pointer(sch->stab, stab);
+ 	}
+ 	if (tca[TCA_RATE]) {
+-		seqcount_t *running;
+-
+ 		err = -EOPNOTSUPP;
+ 		if (sch->flags & TCQ_F_MQROOT) {
+ 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
+ 			goto err_out4;
+ 		}
+ 
+-		if (sch->parent != TC_H_ROOT &&
+-		    !(sch->flags & TCQ_F_INGRESS) &&
+-		    (!p || !(p->flags & TCQ_F_MQROOT)))
+-			running = qdisc_root_sleeping_running(sch);
+-		else
+-			running = &sch->running;
+-
+ 		err = gen_new_estimator(&sch->bstats,
+ 					sch->cpu_bstats,
+ 					&sch->rate_est,
+ 					NULL,
+-					running,
++					true,
+ 					tca[TCA_RATE]);
+ 		if (err) {
+ 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
+@@ -1370,7 +1360,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
+ 				      sch->cpu_bstats,
+ 				      &sch->rate_est,
+ 				      NULL,
+-				      qdisc_root_sleeping_running(sch),
++				      true,
+ 				      tca[TCA_RATE]);
+ 	}
+ out:
+diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
+index 33737169cc2d..28e1897e0da7 100644
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -52,7 +52,7 @@ struct atm_flow_data {
+ 	struct atm_qdisc_data	*parent;	/* parent qdisc */
+ 	struct socket		*sock;		/* for closing */
+ 	int			ref;		/* reference count */
+-	struct gnet_stats_basic_packed	bstats;
++	struct gnet_stats_basic_sync	bstats;
+ 	struct gnet_stats_queue	qstats;
+ 	struct list_head	list;
+ 	struct atm_flow_data	*excess;	/* flow for excess traffic;
+@@ -551,6 +551,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
+ 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+ 	INIT_LIST_HEAD(&p->flows);
+ 	INIT_LIST_HEAD(&p->link.list);
++	gnet_stats_basic_sync_init(&p->link.bstats);
+ 	list_add(&p->link.list, &p->flows);
+ 	p->link.q = qdisc_create_dflt(sch->dev_queue,
+ 				      &pfifo_qdisc_ops, sch->handle, extack);
+@@ -654,8 +655,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+ {
+ 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, NULL, &flow->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
+ 	    gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
+ 		return -1;
+ 
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index 46b3dd71777d..c3a74a2266b0 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -116,7 +116,7 @@ struct cbq_class {
+ 	long			avgidle;
+ 	long			deficit;	/* Saved deficit for WRR */
+ 	psched_time_t		penalized;
+-	struct gnet_stats_basic_packed bstats;
++	struct gnet_stats_basic_sync bstats;
+ 	struct gnet_stats_queue qstats;
+ 	struct net_rate_estimator __rcu *rate_est;
+ 	struct tc_cbq_xstats	xstats;
+@@ -565,8 +565,7 @@ cbq_update(struct cbq_sched_data *q)
+ 		long avgidle = cl->avgidle;
+ 		long idle;
+ 
+-		cl->bstats.packets++;
+-		cl->bstats.bytes += len;
++		_bstats_update(&cl->bstats, len, 1);
+ 
+ 		/*
+ 		 * (now - last) is total time between packet right edges.
+@@ -1383,8 +1382,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+ 	if (cl->undertime != PSCHED_PASTPERFECT)
+ 		cl->xstats.undertime = cl->undertime - q->now;
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, NULL, &cl->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ 		return -1;
+@@ -1518,7 +1516,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
+ 			err = gen_replace_estimator(&cl->bstats, NULL,
+ 						    &cl->rate_est,
+ 						    NULL,
+-						    qdisc_root_sleeping_running(sch),
++						    true,
+ 						    tca[TCA_RATE]);
+ 			if (err) {
+ 				NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
+@@ -1610,6 +1608,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
+ 	if (cl == NULL)
+ 		goto failure;
+ 
++	gnet_stats_basic_sync_init(&cl->bstats);
+ 	err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
+ 	if (err) {
+ 		kfree(cl);
+@@ -1618,9 +1617,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
+ 
+ 	if (tca[TCA_RATE]) {
+ 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
+-					NULL,
+-					qdisc_root_sleeping_running(sch),
+-					tca[TCA_RATE]);
++					NULL, true, tca[TCA_RATE]);
+ 		if (err) {
+ 			NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
+ 			tcf_block_put(cl->block);
+diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
+index 80a88e208d2b..4e5b1cf11b85 100644
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -19,7 +19,7 @@ struct drr_class {
+ 	struct Qdisc_class_common	common;
+ 	unsigned int			filter_cnt;
+ 
+-	struct gnet_stats_basic_packed		bstats;
++	struct gnet_stats_basic_sync		bstats;
+ 	struct gnet_stats_queue		qstats;
+ 	struct net_rate_estimator __rcu *rate_est;
+ 	struct list_head		alist;
+@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 		if (tca[TCA_RATE]) {
+ 			err = gen_replace_estimator(&cl->bstats, NULL,
+ 						    &cl->rate_est,
+-						    NULL,
+-						    qdisc_root_sleeping_running(sch),
++						    NULL, true,
+ 						    tca[TCA_RATE]);
+ 			if (err) {
+ 				NL_SET_ERR_MSG(extack, "Failed to replace estimator");
+@@ -106,6 +105,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 	if (cl == NULL)
+ 		return -ENOBUFS;
+ 
++	gnet_stats_basic_sync_init(&cl->bstats);
+ 	cl->common.classid = classid;
+ 	cl->quantum	   = quantum;
+ 	cl->qdisc	   = qdisc_create_dflt(sch->dev_queue,
+@@ -118,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 
+ 	if (tca[TCA_RATE]) {
+ 		err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
+-					    NULL,
+-					    qdisc_root_sleeping_running(sch),
+-					    tca[TCA_RATE]);
++					    NULL, true, tca[TCA_RATE]);
+ 		if (err) {
+ 			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
+ 			qdisc_put(cl->qdisc);
+@@ -267,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+ 	if (qlen)
+ 		xstats.deficit = cl->deficit;
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, NULL, &cl->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ 	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
+ 		return -1;
+diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
+index 175e07b3d25c..8de4365886e8 100644
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -41,7 +41,7 @@ struct ets_class {
+ 	struct Qdisc *qdisc;
+ 	u32 quantum;
+ 	u32 deficit;
+-	struct gnet_stats_basic_packed bstats;
++	struct gnet_stats_basic_sync bstats;
+ 	struct gnet_stats_queue qstats;
+ };
+ 
+@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
+ 	struct ets_class *cl = ets_class_from_arg(sch, arg);
+ 	struct Qdisc *cl_q = cl->qdisc;
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, NULL, &cl_q->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
+ 	    qdisc_qstats_copy(d, cl_q) < 0)
+ 		return -1;
+ 
+@@ -661,7 +660,6 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
+ 
+ 	q->nbands = nbands;
+ 	for (i = nstrict; i < q->nstrict; i++) {
+-		INIT_LIST_HEAD(&q->classes[i].alist);
+ 		if (q->classes[i].qdisc->q.qlen) {
+ 			list_add_tail(&q->classes[i].alist, &q->active);
+ 			q->classes[i].deficit = quanta[i];
+@@ -689,7 +687,11 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
+ 	ets_offload_change(sch);
+ 	for (i = q->nbands; i < oldbands; i++) {
+ 		qdisc_put(q->classes[i].qdisc);
+-		memset(&q->classes[i], 0, sizeof(q->classes[i]));
++		q->classes[i].qdisc = NULL;
++		q->classes[i].quantum = 0;
++		q->classes[i].deficit = 0;
++		gnet_stats_basic_sync_init(&q->classes[i].bstats);
++		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
+ 	}
+ 	return 0;
+ }
+@@ -698,7 +700,7 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
+ 			  struct netlink_ext_ack *extack)
+ {
+ 	struct ets_sched *q = qdisc_priv(sch);
+-	int err;
++	int err, i;
+ 
+ 	if (!opt)
+ 		return -EINVAL;
+@@ -708,6 +710,9 @@ static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
+ 		return err;
+ 
+ 	INIT_LIST_HEAD(&q->active);
++	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
++		INIT_LIST_HEAD(&q->classes[i].alist);
++
+ 	return ets_qdisc_change(sch, opt, extack);
+ }
+ 
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 02299785209c..b979ae2f551c 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
+ 
+ /*
+  * Transmit possibly several skbs, and handle the return status as
+- * required. Owning running seqcount bit guarantees that
+- * only one CPU can execute this function.
++ * required. Owning qdisc running bit guarantees that only one CPU
++ * can execute this function.
+  *
+  * Returns to the caller:
+  *				false  - hardware queue frozen backoff
+@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
+ 	.ops		=	&noop_qdisc_ops,
+ 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+ 	.dev_queue	=	&noop_netdev_queue,
+-	.running	=	SEQCNT_ZERO(noop_qdisc.running),
+ 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+ 	.gso_skb = {
+ 		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
+@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
+ EXPORT_SYMBOL(pfifo_fast_ops);
+ 
+ static struct lock_class_key qdisc_tx_busylock;
+-static struct lock_class_key qdisc_running_key;
+ 
+ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ 			  const struct Qdisc_ops *ops,
+@@ -892,11 +890,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ 	__skb_queue_head_init(&sch->gso_skb);
+ 	__skb_queue_head_init(&sch->skb_bad_txq);
+ 	qdisc_skb_head_init(&sch->q);
++	gnet_stats_basic_sync_init(&sch->bstats);
+ 	spin_lock_init(&sch->q.lock);
+ 
+ 	if (ops->static_flags & TCQ_F_CPUSTATS) {
+ 		sch->cpu_bstats =
+-			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
++			netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
+ 		if (!sch->cpu_bstats)
+ 			goto errout1;
+ 
+@@ -916,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ 	lockdep_set_class(&sch->seqlock,
+ 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+ 
+-	seqcount_init(&sch->running);
+-	lockdep_set_class(&sch->running,
+-			  dev->qdisc_running_key ?: &qdisc_running_key);
+-
+ 	sch->ops = ops;
+ 	sch->flags = ops->static_flags;
+ 	sch->enqueue = ops->enqueue;
+diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
+index 621dc6afde8f..1073c76d05c4 100644
+--- a/net/sched/sch_gred.c
++++ b/net/sched/sch_gred.c
+@@ -56,6 +56,7 @@ struct gred_sched {
+ 	u32 		DPs;
+ 	u32 		def;
+ 	struct red_vars wred_set;
++	struct tc_gred_qopt_offload *opt;
+ };
+ 
+ static inline int gred_wred_mode(struct gred_sched *table)
+@@ -311,48 +312,50 @@ static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
+ {
+ 	struct gred_sched *table = qdisc_priv(sch);
+ 	struct net_device *dev = qdisc_dev(sch);
+-	struct tc_gred_qopt_offload opt = {
+-		.command	= command,
+-		.handle		= sch->handle,
+-		.parent		= sch->parent,
+-	};
++	struct tc_gred_qopt_offload *opt = table->opt;
+ 
+ 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ 		return;
+ 
++	memset(opt, 0, sizeof(*opt));
++	opt->command = command;
++	opt->handle = sch->handle;
++	opt->parent = sch->parent;
++
+ 	if (command == TC_GRED_REPLACE) {
+ 		unsigned int i;
+ 
+-		opt.set.grio_on = gred_rio_mode(table);
+-		opt.set.wred_on = gred_wred_mode(table);
+-		opt.set.dp_cnt = table->DPs;
+-		opt.set.dp_def = table->def;
++		opt->set.grio_on = gred_rio_mode(table);
++		opt->set.wred_on = gred_wred_mode(table);
++		opt->set.dp_cnt = table->DPs;
++		opt->set.dp_def = table->def;
+ 
+ 		for (i = 0; i < table->DPs; i++) {
+ 			struct gred_sched_data *q = table->tab[i];
+ 
+ 			if (!q)
+ 				continue;
+-			opt.set.tab[i].present = true;
+-			opt.set.tab[i].limit = q->limit;
+-			opt.set.tab[i].prio = q->prio;
+-			opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+-			opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+-			opt.set.tab[i].is_ecn = gred_use_ecn(q);
+-			opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
+-			opt.set.tab[i].probability = q->parms.max_P;
+-			opt.set.tab[i].backlog = &q->backlog;
++			opt->set.tab[i].present = true;
++			opt->set.tab[i].limit = q->limit;
++			opt->set.tab[i].prio = q->prio;
++			opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
++			opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
++			opt->set.tab[i].is_ecn = gred_use_ecn(q);
++			opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
++			opt->set.tab[i].probability = q->parms.max_P;
++			opt->set.tab[i].backlog = &q->backlog;
+ 		}
+-		opt.set.qstats = &sch->qstats;
++		opt->set.qstats = &sch->qstats;
+ 	}
+ 
+-	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
++	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
+ }
+ 
+ static int gred_offload_dump_stats(struct Qdisc *sch)
+ {
+ 	struct gred_sched *table = qdisc_priv(sch);
+ 	struct tc_gred_qopt_offload *hw_stats;
++	u64 bytes = 0, packets = 0;
+ 	unsigned int i;
+ 	int ret;
+ 
+@@ -364,9 +367,11 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
+ 	hw_stats->handle = sch->handle;
+ 	hw_stats->parent = sch->parent;
+ 
+-	for (i = 0; i < MAX_DPs; i++)
++	for (i = 0; i < MAX_DPs; i++) {
++		gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
+ 		if (table->tab[i])
+ 			hw_stats->stats.xstats[i] = &table->tab[i]->stats;
++	}
+ 
+ 	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
+ 	/* Even if driver returns failure adjust the stats - in case offload
+@@ -375,19 +380,19 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
+ 	for (i = 0; i < MAX_DPs; i++) {
+ 		if (!table->tab[i])
+ 			continue;
+-		table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
+-		table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
++		table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
++		table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
+ 		table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
+ 
+-		_bstats_update(&sch->bstats,
+-			       hw_stats->stats.bstats[i].bytes,
+-			       hw_stats->stats.bstats[i].packets);
++		bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
++		packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
+ 		sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
+ 		sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
+ 		sch->qstats.drops += hw_stats->stats.qstats[i].drops;
+ 		sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
+ 		sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
+ 	}
++	_bstats_update(&sch->bstats, bytes, packets);
+ 
+ 	kfree(hw_stats);
+ 	return ret;
+@@ -728,6 +733,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
+ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
+ 		     struct netlink_ext_ack *extack)
+ {
++	struct gred_sched *table = qdisc_priv(sch);
+ 	struct nlattr *tb[TCA_GRED_MAX + 1];
+ 	int err;
+ 
+@@ -751,6 +757,12 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
+ 		sch->limit = qdisc_dev(sch)->tx_queue_len
+ 		             * psched_mtu(qdisc_dev(sch));
+ 
++	if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
++		table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
++		if (!table->opt)
++			return -ENOMEM;
++	}
++
+ 	return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
+ }
+ 
+@@ -907,6 +919,7 @@ static void gred_destroy(struct Qdisc *sch)
+ 			gred_destroy_vq(table->tab[i]);
+ 	}
+ 	gred_offload(sch, TC_GRED_DESTROY);
++	kfree(table->opt);
+ }
+ 
+ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
+diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
+index c802a027b4f3..03efc40e42fc 100644
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -111,7 +111,7 @@ enum hfsc_class_flags {
+ struct hfsc_class {
+ 	struct Qdisc_class_common cl_common;
+ 
+-	struct gnet_stats_basic_packed bstats;
++	struct gnet_stats_basic_sync bstats;
+ 	struct gnet_stats_queue qstats;
+ 	struct net_rate_estimator __rcu *rate_est;
+ 	struct tcf_proto __rcu *filter_list; /* filter list */
+@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 			err = gen_replace_estimator(&cl->bstats, NULL,
+ 						    &cl->rate_est,
+ 						    NULL,
+-						    qdisc_root_sleeping_running(sch),
++						    true,
+ 						    tca[TCA_RATE]);
+ 			if (err)
+ 				return err;
+@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 
+ 	if (tca[TCA_RATE]) {
+ 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
+-					NULL,
+-					qdisc_root_sleeping_running(sch),
+-					tca[TCA_RATE]);
++					NULL, true, tca[TCA_RATE]);
+ 		if (err) {
+ 			tcf_block_put(cl->block);
+ 			kfree(cl);
+@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+ 	xstats.work    = cl->cl_total;
+ 	xstats.rtwork  = cl->cl_cumul;
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ 		return -1;
+@@ -1406,6 +1404,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
+ 	if (err)
+ 		return err;
+ 
++	gnet_stats_basic_sync_init(&q->root.bstats);
+ 	q->root.cl_common.classid = sch->handle;
+ 	q->root.sched   = q;
+ 	q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
+index 8ce999e4ca32..a90e4fffdfd9 100644
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -113,8 +113,8 @@ struct htb_class {
+ 	/*
+ 	 * Written often fields
+ 	 */
+-	struct gnet_stats_basic_packed bstats;
+-	struct gnet_stats_basic_packed bstats_bias;
++	struct gnet_stats_basic_sync bstats;
++	struct gnet_stats_basic_sync bstats_bias;
+ 	struct tc_htb_xstats	xstats;	/* our special stats */
+ 
+ 	/* token bucket parameters */
+@@ -1309,10 +1309,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
+ static void htb_offload_aggregate_stats(struct htb_sched *q,
+ 					struct htb_class *cl)
+ {
++	u64 bytes = 0, packets = 0;
+ 	struct htb_class *c;
+ 	unsigned int i;
+ 
+-	memset(&cl->bstats, 0, sizeof(cl->bstats));
++	gnet_stats_basic_sync_init(&cl->bstats);
+ 
+ 	for (i = 0; i < q->clhash.hashsize; i++) {
+ 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
+@@ -1324,14 +1325,15 @@ static void htb_offload_aggregate_stats(struct htb_sched *q,
+ 			if (p != cl)
+ 				continue;
+ 
+-			cl->bstats.bytes += c->bstats_bias.bytes;
+-			cl->bstats.packets += c->bstats_bias.packets;
++			bytes += u64_stats_read(&c->bstats_bias.bytes);
++			packets += u64_stats_read(&c->bstats_bias.packets);
+ 			if (c->level == 0) {
+-				cl->bstats.bytes += c->leaf.q->bstats.bytes;
+-				cl->bstats.packets += c->leaf.q->bstats.packets;
++				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
++				packets += u64_stats_read(&c->leaf.q->bstats.packets);
+ 			}
+ 		}
+ 	}
++	_bstats_update(&cl->bstats, bytes, packets);
+ }
+ 
+ static int
+@@ -1358,16 +1360,16 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
+ 			if (cl->leaf.q)
+ 				cl->bstats = cl->leaf.q->bstats;
+ 			else
+-				memset(&cl->bstats, 0, sizeof(cl->bstats));
+-			cl->bstats.bytes += cl->bstats_bias.bytes;
+-			cl->bstats.packets += cl->bstats_bias.packets;
++				gnet_stats_basic_sync_init(&cl->bstats);
++			_bstats_update(&cl->bstats,
++				       u64_stats_read(&cl->bstats_bias.bytes),
++				       u64_stats_read(&cl->bstats_bias.packets));
+ 		} else {
+ 			htb_offload_aggregate_stats(q, cl);
+ 		}
+ 	}
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, NULL, &cl->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
+ 		return -1;
+@@ -1582,8 +1584,9 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+ 	}
+ 
+ 	if (cl->parent) {
+-		cl->parent->bstats_bias.bytes += q->bstats.bytes;
+-		cl->parent->bstats_bias.packets += q->bstats.packets;
++		_bstats_update(&cl->parent->bstats_bias,
++			       u64_stats_read(&q->bstats.bytes),
++			       u64_stats_read(&q->bstats.packets));
+ 	}
+ 
+ 	offload_opt = (struct tc_htb_qopt_offload) {
+@@ -1875,6 +1878,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
+ 		if (!cl)
+ 			goto failure;
+ 
++		gnet_stats_basic_sync_init(&cl->bstats);
++		gnet_stats_basic_sync_init(&cl->bstats_bias);
++
+ 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
+ 		if (err) {
+ 			kfree(cl);
+@@ -1884,7 +1890,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
+ 			err = gen_new_estimator(&cl->bstats, NULL,
+ 						&cl->rate_est,
+ 						NULL,
+-						qdisc_root_sleeping_running(sch),
++						true,
+ 						tca[TCA_RATE] ? : &est.nla);
+ 			if (err)
+ 				goto err_block_put;
+@@ -1948,8 +1954,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
+ 				htb_graft_helper(dev_queue, old_q);
+ 				goto err_kill_estimator;
+ 			}
+-			parent->bstats_bias.bytes += old_q->bstats.bytes;
+-			parent->bstats_bias.packets += old_q->bstats.packets;
++			_bstats_update(&parent->bstats_bias,
++				       u64_stats_read(&old_q->bstats.bytes),
++				       u64_stats_read(&old_q->bstats.packets));
+ 			qdisc_put(old_q);
+ 		}
+ 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+@@ -2009,7 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
+ 			err = gen_replace_estimator(&cl->bstats, NULL,
+ 						    &cl->rate_est,
+ 						    NULL,
+-						    qdisc_root_sleeping_running(sch),
++						    true,
+ 						    tca[TCA_RATE]);
+ 			if (err)
+ 				return err;
+diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
+index db18d8a860f9..24c5d97d88dd 100644
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -153,10 +153,9 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+ 	struct net_device *dev = qdisc_dev(sch);
+ 	struct Qdisc *qdisc;
+ 	unsigned int ntx;
+-	__u32 qlen = 0;
+ 
+ 	sch->q.qlen = 0;
+-	memset(&sch->bstats, 0, sizeof(sch->bstats));
++	gnet_stats_basic_sync_init(&sch->bstats);
+ 	memset(&sch->qstats, 0, sizeof(sch->qstats));
+ 
+ 	/* MQ supports lockless qdiscs. However, statistics accounting needs
+@@ -168,25 +167,11 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+ 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ 		spin_lock_bh(qdisc_lock(qdisc));
+ 
+-		if (qdisc_is_percpu_stats(qdisc)) {
+-			qlen = qdisc_qlen_sum(qdisc);
+-			__gnet_stats_copy_basic(NULL, &sch->bstats,
+-						qdisc->cpu_bstats,
+-						&qdisc->bstats);
+-			__gnet_stats_copy_queue(&sch->qstats,
+-						qdisc->cpu_qstats,
+-						&qdisc->qstats, qlen);
+-			sch->q.qlen		+= qlen;
+-		} else {
+-			sch->q.qlen		+= qdisc->q.qlen;
+-			sch->bstats.bytes	+= qdisc->bstats.bytes;
+-			sch->bstats.packets	+= qdisc->bstats.packets;
+-			sch->qstats.qlen	+= qdisc->qstats.qlen;
+-			sch->qstats.backlog	+= qdisc->qstats.backlog;
+-			sch->qstats.drops	+= qdisc->qstats.drops;
+-			sch->qstats.requeues	+= qdisc->qstats.requeues;
+-			sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+-		}
++		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
++				     &qdisc->bstats, false);
++		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
++				     &qdisc->qstats);
++		sch->q.qlen += qdisc_qlen(qdisc);
+ 
+ 		spin_unlock_bh(qdisc_lock(qdisc));
+ 	}
+@@ -269,8 +254,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+ 
+ 	sch = dev_queue->qdisc_sleeping;
+-	if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
+-				  &sch->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
+ 	    qdisc_qstats_copy(d, sch) < 0)
+ 		return -1;
+ 	return 0;
+diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
+index 50e15add6068..42d4101e4f3d 100644
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -412,7 +412,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+ 	unsigned int ntx, tc;
+ 
+ 	sch->q.qlen = 0;
+-	memset(&sch->bstats, 0, sizeof(sch->bstats));
++	gnet_stats_basic_sync_init(&sch->bstats);
+ 	memset(&sch->qstats, 0, sizeof(sch->qstats));
+ 
+ 	/* MQ supports lockless qdiscs. However, statistics accounting needs
+@@ -424,25 +424,11 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+ 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ 		spin_lock_bh(qdisc_lock(qdisc));
+ 
+-		if (qdisc_is_percpu_stats(qdisc)) {
+-			__u32 qlen = qdisc_qlen_sum(qdisc);
+-
+-			__gnet_stats_copy_basic(NULL, &sch->bstats,
+-						qdisc->cpu_bstats,
+-						&qdisc->bstats);
+-			__gnet_stats_copy_queue(&sch->qstats,
+-						qdisc->cpu_qstats,
+-						&qdisc->qstats, qlen);
+-			sch->q.qlen		+= qlen;
+-		} else {
+-			sch->q.qlen		+= qdisc->q.qlen;
+-			sch->bstats.bytes	+= qdisc->bstats.bytes;
+-			sch->bstats.packets	+= qdisc->bstats.packets;
+-			sch->qstats.backlog	+= qdisc->qstats.backlog;
+-			sch->qstats.drops	+= qdisc->qstats.drops;
+-			sch->qstats.requeues	+= qdisc->qstats.requeues;
+-			sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+-		}
++		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
++				     &qdisc->bstats, false);
++		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
++				     &qdisc->qstats);
++		sch->q.qlen += qdisc_qlen(qdisc);
+ 
+ 		spin_unlock_bh(qdisc_lock(qdisc));
+ 	}
+@@ -534,12 +520,13 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ {
+ 	if (cl >= TC_H_MIN_PRIORITY) {
+ 		int i;
+-		__u32 qlen = 0;
++		__u32 qlen;
+ 		struct gnet_stats_queue qstats = {0};
+-		struct gnet_stats_basic_packed bstats = {0};
++		struct gnet_stats_basic_sync bstats;
+ 		struct net_device *dev = qdisc_dev(sch);
+ 		struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
+ 
++		gnet_stats_basic_sync_init(&bstats);
+ 		/* Drop lock here it will be reclaimed before touching
+ 		 * statistics this is required because the d->lock we
+ 		 * hold here is the look on dev_queue->qdisc_sleeping
+@@ -554,40 +541,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ 
+ 			spin_lock_bh(qdisc_lock(qdisc));
+ 
+-			if (qdisc_is_percpu_stats(qdisc)) {
+-				qlen = qdisc_qlen_sum(qdisc);
+-
+-				__gnet_stats_copy_basic(NULL, &bstats,
+-							qdisc->cpu_bstats,
+-							&qdisc->bstats);
+-				__gnet_stats_copy_queue(&qstats,
+-							qdisc->cpu_qstats,
+-							&qdisc->qstats,
+-							qlen);
+-			} else {
+-				qlen		+= qdisc->q.qlen;
+-				bstats.bytes	+= qdisc->bstats.bytes;
+-				bstats.packets	+= qdisc->bstats.packets;
+-				qstats.backlog	+= qdisc->qstats.backlog;
+-				qstats.drops	+= qdisc->qstats.drops;
+-				qstats.requeues	+= qdisc->qstats.requeues;
+-				qstats.overlimits += qdisc->qstats.overlimits;
+-			}
++			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
++					     &qdisc->bstats, false);
++			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
++					     &qdisc->qstats);
++			sch->q.qlen += qdisc_qlen(qdisc);
++
+ 			spin_unlock_bh(qdisc_lock(qdisc));
+ 		}
++		qlen = qdisc_qlen(sch) + qstats.qlen;
+ 
+ 		/* Reclaim root sleeping lock before completing stats */
+ 		if (d->lock)
+ 			spin_lock_bh(d->lock);
+-		if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
++		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
+ 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
+ 			return -1;
+ 	} else {
+ 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+ 
+ 		sch = dev_queue->qdisc_sleeping;
+-		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
+-					  sch->cpu_bstats, &sch->bstats) < 0 ||
++		if (gnet_stats_copy_basic(d, sch->cpu_bstats,
++					  &sch->bstats, true) < 0 ||
+ 		    qdisc_qstats_copy(d, sch) < 0)
+ 			return -1;
+ 	}
+diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
+index 8b99f07aa3a7..f28050c7f12d 100644
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -337,8 +337,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ 	struct Qdisc *cl_q;
+ 
+ 	cl_q = q->queues[cl - 1];
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
+ 	    qdisc_qstats_copy(d, cl_q) < 0)
+ 		return -1;
+ 
+diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
+index 2e0b1e7f5466..c03a11dd990f 100644
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -359,8 +359,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ 	struct Qdisc *cl_q;
+ 
+ 	cl_q = q->queues[cl - 1];
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
++				  &cl_q->bstats, true) < 0 ||
+ 	    qdisc_qstats_copy(d, cl_q) < 0)
+ 		return -1;
+ 
+diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
+index 4c51aeb78f14..e591c3547b12 100644
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -131,7 +131,7 @@ struct qfq_class {
+ 
+ 	unsigned int filter_cnt;
+ 
+-	struct gnet_stats_basic_packed bstats;
++	struct gnet_stats_basic_sync bstats;
+ 	struct gnet_stats_queue qstats;
+ 	struct net_rate_estimator __rcu *rate_est;
+ 	struct Qdisc *qdisc;
+@@ -452,7 +452,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 			err = gen_replace_estimator(&cl->bstats, NULL,
+ 						    &cl->rate_est,
+ 						    NULL,
+-						    qdisc_root_sleeping_running(sch),
++						    true,
+ 						    tca[TCA_RATE]);
+ 			if (err)
+ 				return err;
+@@ -466,6 +466,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 	if (cl == NULL)
+ 		return -ENOBUFS;
+ 
++	gnet_stats_basic_sync_init(&cl->bstats);
+ 	cl->common.classid = classid;
+ 	cl->deficit = lmax;
+ 
+@@ -478,7 +479,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+ 		err = gen_new_estimator(&cl->bstats, NULL,
+ 					&cl->rate_est,
+ 					NULL,
+-					qdisc_root_sleeping_running(sch),
++					true,
+ 					tca[TCA_RATE]);
+ 		if (err)
+ 			goto destroy_class;
+@@ -640,8 +641,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+ 	xstats.weight = cl->agg->class_weight;
+ 	xstats.lmax = cl->agg->lmax;
+ 
+-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+-				  d, NULL, &cl->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ 	    qdisc_qstats_copy(d, cl->qdisc) < 0)
+ 		return -1;
+@@ -1235,8 +1235,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ 		return err;
+ 	}
+ 
+-	cl->bstats.bytes += len;
+-	cl->bstats.packets += gso_segs;
++	_bstats_update(&cl->bstats, len, gso_segs);
+ 	sch->qstats.backlog += len;
+ 	++sch->q.qlen;
+ 
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index e203deacc953..30497d446af5 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1987,7 +1987,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ 	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ 
+ 	sch = dev_queue->qdisc_sleeping;
+-	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
++	if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
+ 	    qdisc_qstats_copy(d, sch) < 0)
+ 		return -1;
+ 	return 0;
+diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
+index 5ff8f902f14d..2ea5c3f18fd4 100644
+--- a/net/sunrpc/svc_xprt.c
++++ b/net/sunrpc/svc_xprt.c
+@@ -441,7 +441,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+ 	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
+ 		return;
+ 
+-	cpu = get_cpu();
++	cpu = get_cpu_light();
+ 	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
+ 
+ 	atomic_long_inc(&pool->sp_stats.packets);
+@@ -465,7 +465,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
+ 	rqstp = NULL;
+ out_unlock:
+ 	rcu_read_unlock();
+-	put_cpu();
++	put_cpu_light();
+ 	trace_svc_xprt_do_enqueue(xprt, rqstp);
+ }
+ EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue);
+diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
+index 5a90aa527877..642d0748c169 100644
+--- a/samples/kfifo/bytestream-example.c
++++ b/samples/kfifo/bytestream-example.c
+@@ -22,10 +22,10 @@
+ #define	PROC_FIFO	"bytestream-fifo"
+ 
+ /* lock for procfs read access */
+-static DEFINE_MUTEX(read_lock);
++static DEFINE_MUTEX(read_access);
+ 
+ /* lock for procfs write access */
+-static DEFINE_MUTEX(write_lock);
++static DEFINE_MUTEX(write_access);
+ 
+ /*
+  * define DYNAMIC in this example for a dynamically allocated fifo.
+@@ -116,12 +116,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
+ 	int ret;
+ 	unsigned int copied;
+ 
+-	if (mutex_lock_interruptible(&write_lock))
++	if (mutex_lock_interruptible(&write_access))
+ 		return -ERESTARTSYS;
+ 
+ 	ret = kfifo_from_user(&test, buf, count, &copied);
+ 
+-	mutex_unlock(&write_lock);
++	mutex_unlock(&write_access);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -134,12 +134,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
+ 	int ret;
+ 	unsigned int copied;
+ 
+-	if (mutex_lock_interruptible(&read_lock))
++	if (mutex_lock_interruptible(&read_access))
+ 		return -ERESTARTSYS;
+ 
+ 	ret = kfifo_to_user(&test, buf, count, &copied);
+ 
+-	mutex_unlock(&read_lock);
++	mutex_unlock(&read_access);
+ 	if (ret)
+ 		return ret;
+ 
+diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
+index e5403d8c971a..c61482ba94f4 100644
+--- a/samples/kfifo/inttype-example.c
++++ b/samples/kfifo/inttype-example.c
+@@ -22,10 +22,10 @@
+ #define	PROC_FIFO	"int-fifo"
+ 
+ /* lock for procfs read access */
+-static DEFINE_MUTEX(read_lock);
++static DEFINE_MUTEX(read_access);
+ 
+ /* lock for procfs write access */
+-static DEFINE_MUTEX(write_lock);
++static DEFINE_MUTEX(write_access);
+ 
+ /*
+  * define DYNAMIC in this example for a dynamically allocated fifo.
+@@ -109,12 +109,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
+ 	int ret;
+ 	unsigned int copied;
+ 
+-	if (mutex_lock_interruptible(&write_lock))
++	if (mutex_lock_interruptible(&write_access))
+ 		return -ERESTARTSYS;
+ 
+ 	ret = kfifo_from_user(&test, buf, count, &copied);
+ 
+-	mutex_unlock(&write_lock);
++	mutex_unlock(&write_access);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -127,12 +127,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
+ 	int ret;
+ 	unsigned int copied;
+ 
+-	if (mutex_lock_interruptible(&read_lock))
++	if (mutex_lock_interruptible(&read_access))
+ 		return -ERESTARTSYS;
+ 
+ 	ret = kfifo_to_user(&test, buf, count, &copied);
+ 
+-	mutex_unlock(&read_lock);
++	mutex_unlock(&read_access);
+ 	if (ret)
+ 		return ret;
+ 
+diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
+index f64f3d62d6c2..e4087b2d3fc4 100644
+--- a/samples/kfifo/record-example.c
++++ b/samples/kfifo/record-example.c
+@@ -22,10 +22,10 @@
+ #define	PROC_FIFO	"record-fifo"
+ 
+ /* lock for procfs read access */
+-static DEFINE_MUTEX(read_lock);
++static DEFINE_MUTEX(read_access);
+ 
+ /* lock for procfs write access */
+-static DEFINE_MUTEX(write_lock);
++static DEFINE_MUTEX(write_access);
+ 
+ /*
+  * define DYNAMIC in this example for a dynamically allocated fifo.
+@@ -123,12 +123,12 @@ static ssize_t fifo_write(struct file *file, const char __user *buf,
+ 	int ret;
+ 	unsigned int copied;
+ 
+-	if (mutex_lock_interruptible(&write_lock))
++	if (mutex_lock_interruptible(&write_access))
+ 		return -ERESTARTSYS;
+ 
+ 	ret = kfifo_from_user(&test, buf, count, &copied);
+ 
+-	mutex_unlock(&write_lock);
++	mutex_unlock(&write_access);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -141,12 +141,12 @@ static ssize_t fifo_read(struct file *file, char __user *buf,
+ 	int ret;
+ 	unsigned int copied;
+ 
+-	if (mutex_lock_interruptible(&read_lock))
++	if (mutex_lock_interruptible(&read_access))
+ 		return -ERESTARTSYS;
+ 
+ 	ret = kfifo_to_user(&test, buf, count, &copied);
+ 
+-	mutex_unlock(&read_lock);
++	mutex_unlock(&read_access);
+ 	if (ret)
+ 		return ret;
+ 
+diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
+index 3f3f56f6be4d..5dbcdc5b22b5 100644
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -51,8 +51,10 @@
+ #define SMK_RECEIVING	1
+ #define SMK_SENDING	2
+ 
++#ifdef SMACK_IPV6_PORT_LABELING
+ static DEFINE_MUTEX(smack_ipv6_lock);
+ static LIST_HEAD(smk_ipv6_port_list);
++#endif
+ struct kmem_cache *smack_rule_cache;
+ int smack_enabled __initdata;
+ 
+@@ -2603,7 +2605,6 @@ static void smk_ipv6_port_label(struct socket *sock, struct sockaddr *address)
+ 	mutex_unlock(&smack_ipv6_lock);
+ 	return;
+ }
+-#endif
+ 
+ /**
+  * smk_ipv6_port_check - check Smack port access
+@@ -2666,6 +2667,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
+ 
+ 	return smk_ipv6_check(skp, object, address, act);
+ }
++#endif
+ 
+ /**
+  * smack_inode_setsecurity - set smack xattrs
+@@ -2852,8 +2854,9 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap,
+ 			rc = smk_ipv6_check(ssp->smk_out, rsp, sip,
+ 					    SMK_CONNECTING);
+ 		}
+-		if (__is_defined(SMACK_IPV6_PORT_LABELING))
+-			rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
++#ifdef SMACK_IPV6_PORT_LABELING
++		rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING);
++#endif
+ 
+ 		return rc;
+ 	}
+diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.c b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
+index e95c7c018e7d..4f2c2379531b 100644
+--- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c
++++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
+@@ -288,7 +288,6 @@ const struct snd_soc_dai_ops mtk_afe_fe_ops = {
+ };
+ EXPORT_SYMBOL_GPL(mtk_afe_fe_ops);
+ 
+-static DEFINE_MUTEX(irqs_lock);
+ int mtk_dynamic_irq_acquire(struct mtk_base_afe *afe)
+ {
+ 	int i;
diff --git a/meta-digi-arm/recipes-kernel/linux/linux-dey_5.15.bb b/meta-digi-arm/recipes-kernel/linux/linux-dey_5.15.bb
index cc65d7536..21efc5450 100644
--- a/meta-digi-arm/recipes-kernel/linux/linux-dey_5.15.bb
+++ b/meta-digi-arm/recipes-kernel/linux/linux-dey_5.15.bb
@@ -7,6 +7,24 @@ SRCBRANCH:stm32mpcommon = "v5.15.118/stm/master"
 SRCREV = "${AUTOREV}"
 SRCREV:stm32mpcommon = "${AUTOREV}"
 
+STM_RT_PATCHES = " \
+	file://patch-5.15.119-rt65.patch \
+	file://0023-5.15-stm32mp-rt-49-r1-CLOCK.patch \
+	file://0024-5.15-stm32mp-rt-49-r1-DMA.patch \
+	file://0025-5.15-stm32mp-rt-49-r1-MFD.patch \
+	file://0026-5.15-stm32mp-rt-49-r1-NET-TTY.patch \
+	file://0027-5.15-stm32mp-rt-49-r1-DEVICETREE.patch \
+	file://0028-5.15-stm32mp-rt-49-r1-CONFIG.patch \
+"
+
+SRC_URI:append:stm32mpcommon = " \
+	${@bb.utils.contains('DISTRO_FEATURES', 'rt', '${STM_RT_PATCHES}', '', d)} \
+"
+
+KERNEL_CONFIG_FRAGMENTS:append:stm32mpcommon = " ${@bb.utils.contains('DISTRO_FEATURES', 'rt', '${S}/arch/arm/configs/fragment-07-rt.config', '', d)}"
+KERNEL_CONFIG_FRAGMENTS:append:stm32mpcommon = " ${@bb.utils.contains('DISTRO_FEATURES', 'rt', '${S}/arch/arm/configs/fragment-07-rt-sysvinit.config', '', d)}"
+KERNEL_CONFIG_FRAGMENTS:append:ccmp13 = " ${@bb.utils.contains('DISTRO_FEATURES', 'rt', '${S}/arch/arm/configs/fragment-08-rt-mp13.config', '', d)}"
+
 do_assemble_fitimage:append:ccmp1() {
 	#
 	# Step 9: Add public keys to the different U-Boot dtb files