5443 lines
156 KiB
Diff
5443 lines
156 KiB
Diff
diff --git a/Makefile b/Makefile
|
|
index 9bdb93053ee93..33c45a0cd8582 100644
|
|
--- a/Makefile
|
|
+++ b/Makefile
|
|
@@ -1,7 +1,7 @@
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
VERSION = 5
|
|
PATCHLEVEL = 8
|
|
-SUBLEVEL = 17
|
|
+SUBLEVEL = 18
|
|
EXTRAVERSION =
|
|
NAME = Kleptomaniac Octopus
|
|
|
|
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
|
|
index d5fe7c9e0be1d..5a34423464188 100644
|
|
--- a/arch/arm64/Makefile
|
|
+++ b/arch/arm64/Makefile
|
|
@@ -10,14 +10,14 @@
|
|
#
|
|
# Copyright (C) 1995-2001 by Russell King
|
|
|
|
-LDFLAGS_vmlinux :=--no-undefined -X
|
|
+LDFLAGS_vmlinux :=--no-undefined -X -z norelro
|
|
CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
|
|
|
|
ifeq ($(CONFIG_RELOCATABLE), y)
|
|
# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
|
|
# for relative relocs, since this leads to better Image compression
|
|
# with the relocation offsets always being zero.
|
|
-LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \
|
|
+LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \
|
|
$(call ld-option, --no-apply-dynamic-relocs)
|
|
endif
|
|
|
|
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
|
|
index 6e8a7eec667e8..d8a2bacf4e0a8 100644
|
|
--- a/arch/arm64/kernel/cpu_errata.c
|
|
+++ b/arch/arm64/kernel/cpu_errata.c
|
|
@@ -457,6 +457,12 @@ out_printmsg:
|
|
return required;
|
|
}
|
|
|
|
+static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
|
|
+{
|
|
+ if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
|
|
+ cap->matches(cap, SCOPE_LOCAL_CPU);
|
|
+}
|
|
+
|
|
/* known invulnerable cores */
|
|
static const struct midr_range arm64_ssb_cpus[] = {
|
|
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
|
|
@@ -599,6 +605,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
|
|
return (need_wa > 0);
|
|
}
|
|
|
|
+static void
|
|
+cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
|
|
+{
|
|
+ cap->matches(cap, SCOPE_LOCAL_CPU);
|
|
+}
|
|
+
|
|
static const __maybe_unused struct midr_range tx2_family_cpus[] = {
|
|
MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
|
|
MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
|
|
@@ -890,9 +902,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
|
|
},
|
|
#endif
|
|
{
|
|
+ .desc = "Branch predictor hardening",
|
|
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
|
|
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
|
|
.matches = check_branch_predictor,
|
|
+ .cpu_enable = cpu_enable_branch_predictor_hardening,
|
|
},
|
|
#ifdef CONFIG_HARDEN_EL2_VECTORS
|
|
{
|
|
@@ -906,6 +920,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
|
|
.capability = ARM64_SSBD,
|
|
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
|
|
.matches = has_ssbd_mitigation,
|
|
+ .cpu_enable = cpu_enable_ssbd_mitigation,
|
|
.midr_range_list = arm64_ssb_cpus,
|
|
},
|
|
#ifdef CONFIG_ARM64_ERRATUM_1418040
|
|
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
|
|
index 17c24f14615fb..6839f8fcf76b2 100644
|
|
--- a/arch/openrisc/include/asm/uaccess.h
|
|
+++ b/arch/openrisc/include/asm/uaccess.h
|
|
@@ -164,19 +164,19 @@ struct __large_struct {
|
|
|
|
#define __get_user_nocheck(x, ptr, size) \
|
|
({ \
|
|
- long __gu_err, __gu_val; \
|
|
- __get_user_size(__gu_val, (ptr), (size), __gu_err); \
|
|
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
|
|
+ long __gu_err; \
|
|
+ __get_user_size((x), (ptr), (size), __gu_err); \
|
|
__gu_err; \
|
|
})
|
|
|
|
#define __get_user_check(x, ptr, size) \
|
|
({ \
|
|
- long __gu_err = -EFAULT, __gu_val = 0; \
|
|
- const __typeof__(*(ptr)) * __gu_addr = (ptr); \
|
|
- if (access_ok(__gu_addr, size)) \
|
|
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
|
|
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
|
|
+ long __gu_err = -EFAULT; \
|
|
+ const __typeof__(*(ptr)) *__gu_addr = (ptr); \
|
|
+ if (access_ok(__gu_addr, size)) \
|
|
+ __get_user_size((x), __gu_addr, (size), __gu_err); \
|
|
+ else \
|
|
+ (x) = (__typeof__(*(ptr))) 0; \
|
|
__gu_err; \
|
|
})
|
|
|
|
@@ -190,11 +190,13 @@ do { \
|
|
case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \
|
|
case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \
|
|
case 8: __get_user_asm2(x, ptr, retval); break; \
|
|
- default: (x) = __get_user_bad(); \
|
|
+ default: (x) = (__typeof__(*(ptr)))__get_user_bad(); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define __get_user_asm(x, addr, err, op) \
|
|
+{ \
|
|
+ unsigned long __gu_tmp; \
|
|
__asm__ __volatile__( \
|
|
"1: "op" %1,0(%2)\n" \
|
|
"2:\n" \
|
|
@@ -208,10 +210,14 @@ do { \
|
|
" .align 2\n" \
|
|
" .long 1b,3b\n" \
|
|
".previous" \
|
|
- : "=r"(err), "=r"(x) \
|
|
- : "r"(addr), "i"(-EFAULT), "0"(err))
|
|
+ : "=r"(err), "=r"(__gu_tmp) \
|
|
+ : "r"(addr), "i"(-EFAULT), "0"(err)); \
|
|
+ (x) = (__typeof__(*(addr)))__gu_tmp; \
|
|
+}
|
|
|
|
#define __get_user_asm2(x, addr, err) \
|
|
+{ \
|
|
+ unsigned long long __gu_tmp; \
|
|
__asm__ __volatile__( \
|
|
"1: l.lwz %1,0(%2)\n" \
|
|
"2: l.lwz %H1,4(%2)\n" \
|
|
@@ -228,8 +234,11 @@ do { \
|
|
" .long 1b,4b\n" \
|
|
" .long 2b,4b\n" \
|
|
".previous" \
|
|
- : "=r"(err), "=&r"(x) \
|
|
- : "r"(addr), "i"(-EFAULT), "0"(err))
|
|
+ : "=r"(err), "=&r"(__gu_tmp) \
|
|
+ : "r"(addr), "i"(-EFAULT), "0"(err)); \
|
|
+ (x) = (__typeof__(*(addr)))( \
|
|
+ (__typeof__((x)-(x)))__gu_tmp); \
|
|
+}
|
|
|
|
/* more complex routines */
|
|
|
|
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
|
|
index 9fa23eb320ff5..cf78ad7ff0b7c 100644
|
|
--- a/arch/powerpc/Kconfig
|
|
+++ b/arch/powerpc/Kconfig
|
|
@@ -135,7 +135,7 @@ config PPC
|
|
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
|
|
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
|
|
select ARCH_HAS_UACCESS_FLUSHCACHE
|
|
- select ARCH_HAS_UACCESS_MCSAFE if PPC64
|
|
+ select ARCH_HAS_COPY_MC if PPC64
|
|
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
|
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
|
select ARCH_KEEP_MEMBLOCK
|
|
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
|
|
index b72692702f35f..9bf6dffb40900 100644
|
|
--- a/arch/powerpc/include/asm/string.h
|
|
+++ b/arch/powerpc/include/asm/string.h
|
|
@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
|
|
#ifndef CONFIG_KASAN
|
|
#define __HAVE_ARCH_MEMSET32
|
|
#define __HAVE_ARCH_MEMSET64
|
|
-#define __HAVE_ARCH_MEMCPY_MCSAFE
|
|
|
|
-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
|
|
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
|
|
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
|
|
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
|
|
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
|
|
index 64c04ab091123..97506441c15b1 100644
|
|
--- a/arch/powerpc/include/asm/uaccess.h
|
|
+++ b/arch/powerpc/include/asm/uaccess.h
|
|
@@ -436,6 +436,32 @@ do { \
|
|
extern unsigned long __copy_tofrom_user(void __user *to,
|
|
const void __user *from, unsigned long size);
|
|
|
|
+#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
+unsigned long __must_check
|
|
+copy_mc_generic(void *to, const void *from, unsigned long size);
|
|
+
|
|
+static inline unsigned long __must_check
|
|
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
|
|
+{
|
|
+ return copy_mc_generic(to, from, size);
|
|
+}
|
|
+#define copy_mc_to_kernel copy_mc_to_kernel
|
|
+
|
|
+static inline unsigned long __must_check
|
|
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
|
|
+{
|
|
+ if (likely(check_copy_size(from, n, true))) {
|
|
+ if (access_ok(to, n)) {
|
|
+ allow_write_to_user(to, n);
|
|
+ n = copy_mc_generic((void *)to, from, n);
|
|
+ prevent_write_to_user(to, n);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return n;
|
|
+}
|
|
+#endif
|
|
+
|
|
#ifdef __powerpc64__
|
|
static inline unsigned long
|
|
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
|
|
@@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
|
|
return ret;
|
|
}
|
|
|
|
-static __always_inline unsigned long __must_check
|
|
-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
|
|
-{
|
|
- if (likely(check_copy_size(from, n, true))) {
|
|
- if (access_ok(to, n)) {
|
|
- allow_write_to_user(to, n);
|
|
- n = memcpy_mcsafe((void *)to, from, n);
|
|
- prevent_write_to_user(to, n);
|
|
- }
|
|
- }
|
|
-
|
|
- return n;
|
|
-}
|
|
-
|
|
unsigned long __arch_clear_user(void __user *addr, unsigned long size);
|
|
|
|
static inline unsigned long clear_user(void __user *addr, unsigned long size)
|
|
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
|
|
index 5e994cda8e401..c254f5f733a86 100644
|
|
--- a/arch/powerpc/lib/Makefile
|
|
+++ b/arch/powerpc/lib/Makefile
|
|
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
|
|
memcpy_power7.o
|
|
|
|
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
|
|
- memcpy_64.o memcpy_mcsafe_64.o
|
|
+ memcpy_64.o copy_mc_64.o
|
|
|
|
obj64-$(CONFIG_SMP) += locks.o
|
|
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
|
|
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
|
|
new file mode 100644
|
|
index 0000000000000..88d46c471493b
|
|
--- /dev/null
|
|
+++ b/arch/powerpc/lib/copy_mc_64.S
|
|
@@ -0,0 +1,242 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+/*
|
|
+ * Copyright (C) IBM Corporation, 2011
|
|
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
|
|
+ * Author - Balbir Singh <bsingharora@gmail.com>
|
|
+ */
|
|
+#include <asm/ppc_asm.h>
|
|
+#include <asm/errno.h>
|
|
+#include <asm/export.h>
|
|
+
|
|
+ .macro err1
|
|
+100:
|
|
+ EX_TABLE(100b,.Ldo_err1)
|
|
+ .endm
|
|
+
|
|
+ .macro err2
|
|
+200:
|
|
+ EX_TABLE(200b,.Ldo_err2)
|
|
+ .endm
|
|
+
|
|
+ .macro err3
|
|
+300: EX_TABLE(300b,.Ldone)
|
|
+ .endm
|
|
+
|
|
+.Ldo_err2:
|
|
+ ld r22,STK_REG(R22)(r1)
|
|
+ ld r21,STK_REG(R21)(r1)
|
|
+ ld r20,STK_REG(R20)(r1)
|
|
+ ld r19,STK_REG(R19)(r1)
|
|
+ ld r18,STK_REG(R18)(r1)
|
|
+ ld r17,STK_REG(R17)(r1)
|
|
+ ld r16,STK_REG(R16)(r1)
|
|
+ ld r15,STK_REG(R15)(r1)
|
|
+ ld r14,STK_REG(R14)(r1)
|
|
+ addi r1,r1,STACKFRAMESIZE
|
|
+.Ldo_err1:
|
|
+ /* Do a byte by byte copy to get the exact remaining size */
|
|
+ mtctr r7
|
|
+46:
|
|
+err3; lbz r0,0(r4)
|
|
+ addi r4,r4,1
|
|
+err3; stb r0,0(r3)
|
|
+ addi r3,r3,1
|
|
+ bdnz 46b
|
|
+ li r3,0
|
|
+ blr
|
|
+
|
|
+.Ldone:
|
|
+ mfctr r3
|
|
+ blr
|
|
+
|
|
+
|
|
+_GLOBAL(copy_mc_generic)
|
|
+ mr r7,r5
|
|
+ cmpldi r5,16
|
|
+ blt .Lshort_copy
|
|
+
|
|
+.Lcopy:
|
|
+ /* Get the source 8B aligned */
|
|
+ neg r6,r4
|
|
+ mtocrf 0x01,r6
|
|
+ clrldi r6,r6,(64-3)
|
|
+
|
|
+ bf cr7*4+3,1f
|
|
+err1; lbz r0,0(r4)
|
|
+ addi r4,r4,1
|
|
+err1; stb r0,0(r3)
|
|
+ addi r3,r3,1
|
|
+ subi r7,r7,1
|
|
+
|
|
+1: bf cr7*4+2,2f
|
|
+err1; lhz r0,0(r4)
|
|
+ addi r4,r4,2
|
|
+err1; sth r0,0(r3)
|
|
+ addi r3,r3,2
|
|
+ subi r7,r7,2
|
|
+
|
|
+2: bf cr7*4+1,3f
|
|
+err1; lwz r0,0(r4)
|
|
+ addi r4,r4,4
|
|
+err1; stw r0,0(r3)
|
|
+ addi r3,r3,4
|
|
+ subi r7,r7,4
|
|
+
|
|
+3: sub r5,r5,r6
|
|
+ cmpldi r5,128
|
|
+
|
|
+ mflr r0
|
|
+ stdu r1,-STACKFRAMESIZE(r1)
|
|
+ std r14,STK_REG(R14)(r1)
|
|
+ std r15,STK_REG(R15)(r1)
|
|
+ std r16,STK_REG(R16)(r1)
|
|
+ std r17,STK_REG(R17)(r1)
|
|
+ std r18,STK_REG(R18)(r1)
|
|
+ std r19,STK_REG(R19)(r1)
|
|
+ std r20,STK_REG(R20)(r1)
|
|
+ std r21,STK_REG(R21)(r1)
|
|
+ std r22,STK_REG(R22)(r1)
|
|
+ std r0,STACKFRAMESIZE+16(r1)
|
|
+
|
|
+ blt 5f
|
|
+ srdi r6,r5,7
|
|
+ mtctr r6
|
|
+
|
|
+ /* Now do cacheline (128B) sized loads and stores. */
|
|
+ .align 5
|
|
+4:
|
|
+err2; ld r0,0(r4)
|
|
+err2; ld r6,8(r4)
|
|
+err2; ld r8,16(r4)
|
|
+err2; ld r9,24(r4)
|
|
+err2; ld r10,32(r4)
|
|
+err2; ld r11,40(r4)
|
|
+err2; ld r12,48(r4)
|
|
+err2; ld r14,56(r4)
|
|
+err2; ld r15,64(r4)
|
|
+err2; ld r16,72(r4)
|
|
+err2; ld r17,80(r4)
|
|
+err2; ld r18,88(r4)
|
|
+err2; ld r19,96(r4)
|
|
+err2; ld r20,104(r4)
|
|
+err2; ld r21,112(r4)
|
|
+err2; ld r22,120(r4)
|
|
+ addi r4,r4,128
|
|
+err2; std r0,0(r3)
|
|
+err2; std r6,8(r3)
|
|
+err2; std r8,16(r3)
|
|
+err2; std r9,24(r3)
|
|
+err2; std r10,32(r3)
|
|
+err2; std r11,40(r3)
|
|
+err2; std r12,48(r3)
|
|
+err2; std r14,56(r3)
|
|
+err2; std r15,64(r3)
|
|
+err2; std r16,72(r3)
|
|
+err2; std r17,80(r3)
|
|
+err2; std r18,88(r3)
|
|
+err2; std r19,96(r3)
|
|
+err2; std r20,104(r3)
|
|
+err2; std r21,112(r3)
|
|
+err2; std r22,120(r3)
|
|
+ addi r3,r3,128
|
|
+ subi r7,r7,128
|
|
+ bdnz 4b
|
|
+
|
|
+ clrldi r5,r5,(64-7)
|
|
+
|
|
+ /* Up to 127B to go */
|
|
+5: srdi r6,r5,4
|
|
+ mtocrf 0x01,r6
|
|
+
|
|
+6: bf cr7*4+1,7f
|
|
+err2; ld r0,0(r4)
|
|
+err2; ld r6,8(r4)
|
|
+err2; ld r8,16(r4)
|
|
+err2; ld r9,24(r4)
|
|
+err2; ld r10,32(r4)
|
|
+err2; ld r11,40(r4)
|
|
+err2; ld r12,48(r4)
|
|
+err2; ld r14,56(r4)
|
|
+ addi r4,r4,64
|
|
+err2; std r0,0(r3)
|
|
+err2; std r6,8(r3)
|
|
+err2; std r8,16(r3)
|
|
+err2; std r9,24(r3)
|
|
+err2; std r10,32(r3)
|
|
+err2; std r11,40(r3)
|
|
+err2; std r12,48(r3)
|
|
+err2; std r14,56(r3)
|
|
+ addi r3,r3,64
|
|
+ subi r7,r7,64
|
|
+
|
|
+7: ld r14,STK_REG(R14)(r1)
|
|
+ ld r15,STK_REG(R15)(r1)
|
|
+ ld r16,STK_REG(R16)(r1)
|
|
+ ld r17,STK_REG(R17)(r1)
|
|
+ ld r18,STK_REG(R18)(r1)
|
|
+ ld r19,STK_REG(R19)(r1)
|
|
+ ld r20,STK_REG(R20)(r1)
|
|
+ ld r21,STK_REG(R21)(r1)
|
|
+ ld r22,STK_REG(R22)(r1)
|
|
+ addi r1,r1,STACKFRAMESIZE
|
|
+
|
|
+ /* Up to 63B to go */
|
|
+ bf cr7*4+2,8f
|
|
+err1; ld r0,0(r4)
|
|
+err1; ld r6,8(r4)
|
|
+err1; ld r8,16(r4)
|
|
+err1; ld r9,24(r4)
|
|
+ addi r4,r4,32
|
|
+err1; std r0,0(r3)
|
|
+err1; std r6,8(r3)
|
|
+err1; std r8,16(r3)
|
|
+err1; std r9,24(r3)
|
|
+ addi r3,r3,32
|
|
+ subi r7,r7,32
|
|
+
|
|
+ /* Up to 31B to go */
|
|
+8: bf cr7*4+3,9f
|
|
+err1; ld r0,0(r4)
|
|
+err1; ld r6,8(r4)
|
|
+ addi r4,r4,16
|
|
+err1; std r0,0(r3)
|
|
+err1; std r6,8(r3)
|
|
+ addi r3,r3,16
|
|
+ subi r7,r7,16
|
|
+
|
|
+9: clrldi r5,r5,(64-4)
|
|
+
|
|
+ /* Up to 15B to go */
|
|
+.Lshort_copy:
|
|
+ mtocrf 0x01,r5
|
|
+ bf cr7*4+0,12f
|
|
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
|
|
+err1; lwz r6,4(r4)
|
|
+ addi r4,r4,8
|
|
+err1; stw r0,0(r3)
|
|
+err1; stw r6,4(r3)
|
|
+ addi r3,r3,8
|
|
+ subi r7,r7,8
|
|
+
|
|
+12: bf cr7*4+1,13f
|
|
+err1; lwz r0,0(r4)
|
|
+ addi r4,r4,4
|
|
+err1; stw r0,0(r3)
|
|
+ addi r3,r3,4
|
|
+ subi r7,r7,4
|
|
+
|
|
+13: bf cr7*4+2,14f
|
|
+err1; lhz r0,0(r4)
|
|
+ addi r4,r4,2
|
|
+err1; sth r0,0(r3)
|
|
+ addi r3,r3,2
|
|
+ subi r7,r7,2
|
|
+
|
|
+14: bf cr7*4+3,15f
|
|
+err1; lbz r0,0(r4)
|
|
+err1; stb r0,0(r3)
|
|
+
|
|
+15: li r3,0
|
|
+ blr
|
|
+
|
|
+EXPORT_SYMBOL_GPL(copy_mc_generic);
|
|
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
|
|
deleted file mode 100644
|
|
index cb882d9a6d8a3..0000000000000
|
|
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
|
|
+++ /dev/null
|
|
@@ -1,242 +0,0 @@
|
|
-/* SPDX-License-Identifier: GPL-2.0 */
|
|
-/*
|
|
- * Copyright (C) IBM Corporation, 2011
|
|
- * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
|
|
- * Author - Balbir Singh <bsingharora@gmail.com>
|
|
- */
|
|
-#include <asm/ppc_asm.h>
|
|
-#include <asm/errno.h>
|
|
-#include <asm/export.h>
|
|
-
|
|
- .macro err1
|
|
-100:
|
|
- EX_TABLE(100b,.Ldo_err1)
|
|
- .endm
|
|
-
|
|
- .macro err2
|
|
-200:
|
|
- EX_TABLE(200b,.Ldo_err2)
|
|
- .endm
|
|
-
|
|
- .macro err3
|
|
-300: EX_TABLE(300b,.Ldone)
|
|
- .endm
|
|
-
|
|
-.Ldo_err2:
|
|
- ld r22,STK_REG(R22)(r1)
|
|
- ld r21,STK_REG(R21)(r1)
|
|
- ld r20,STK_REG(R20)(r1)
|
|
- ld r19,STK_REG(R19)(r1)
|
|
- ld r18,STK_REG(R18)(r1)
|
|
- ld r17,STK_REG(R17)(r1)
|
|
- ld r16,STK_REG(R16)(r1)
|
|
- ld r15,STK_REG(R15)(r1)
|
|
- ld r14,STK_REG(R14)(r1)
|
|
- addi r1,r1,STACKFRAMESIZE
|
|
-.Ldo_err1:
|
|
- /* Do a byte by byte copy to get the exact remaining size */
|
|
- mtctr r7
|
|
-46:
|
|
-err3; lbz r0,0(r4)
|
|
- addi r4,r4,1
|
|
-err3; stb r0,0(r3)
|
|
- addi r3,r3,1
|
|
- bdnz 46b
|
|
- li r3,0
|
|
- blr
|
|
-
|
|
-.Ldone:
|
|
- mfctr r3
|
|
- blr
|
|
-
|
|
-
|
|
-_GLOBAL(memcpy_mcsafe)
|
|
- mr r7,r5
|
|
- cmpldi r5,16
|
|
- blt .Lshort_copy
|
|
-
|
|
-.Lcopy:
|
|
- /* Get the source 8B aligned */
|
|
- neg r6,r4
|
|
- mtocrf 0x01,r6
|
|
- clrldi r6,r6,(64-3)
|
|
-
|
|
- bf cr7*4+3,1f
|
|
-err1; lbz r0,0(r4)
|
|
- addi r4,r4,1
|
|
-err1; stb r0,0(r3)
|
|
- addi r3,r3,1
|
|
- subi r7,r7,1
|
|
-
|
|
-1: bf cr7*4+2,2f
|
|
-err1; lhz r0,0(r4)
|
|
- addi r4,r4,2
|
|
-err1; sth r0,0(r3)
|
|
- addi r3,r3,2
|
|
- subi r7,r7,2
|
|
-
|
|
-2: bf cr7*4+1,3f
|
|
-err1; lwz r0,0(r4)
|
|
- addi r4,r4,4
|
|
-err1; stw r0,0(r3)
|
|
- addi r3,r3,4
|
|
- subi r7,r7,4
|
|
-
|
|
-3: sub r5,r5,r6
|
|
- cmpldi r5,128
|
|
-
|
|
- mflr r0
|
|
- stdu r1,-STACKFRAMESIZE(r1)
|
|
- std r14,STK_REG(R14)(r1)
|
|
- std r15,STK_REG(R15)(r1)
|
|
- std r16,STK_REG(R16)(r1)
|
|
- std r17,STK_REG(R17)(r1)
|
|
- std r18,STK_REG(R18)(r1)
|
|
- std r19,STK_REG(R19)(r1)
|
|
- std r20,STK_REG(R20)(r1)
|
|
- std r21,STK_REG(R21)(r1)
|
|
- std r22,STK_REG(R22)(r1)
|
|
- std r0,STACKFRAMESIZE+16(r1)
|
|
-
|
|
- blt 5f
|
|
- srdi r6,r5,7
|
|
- mtctr r6
|
|
-
|
|
- /* Now do cacheline (128B) sized loads and stores. */
|
|
- .align 5
|
|
-4:
|
|
-err2; ld r0,0(r4)
|
|
-err2; ld r6,8(r4)
|
|
-err2; ld r8,16(r4)
|
|
-err2; ld r9,24(r4)
|
|
-err2; ld r10,32(r4)
|
|
-err2; ld r11,40(r4)
|
|
-err2; ld r12,48(r4)
|
|
-err2; ld r14,56(r4)
|
|
-err2; ld r15,64(r4)
|
|
-err2; ld r16,72(r4)
|
|
-err2; ld r17,80(r4)
|
|
-err2; ld r18,88(r4)
|
|
-err2; ld r19,96(r4)
|
|
-err2; ld r20,104(r4)
|
|
-err2; ld r21,112(r4)
|
|
-err2; ld r22,120(r4)
|
|
- addi r4,r4,128
|
|
-err2; std r0,0(r3)
|
|
-err2; std r6,8(r3)
|
|
-err2; std r8,16(r3)
|
|
-err2; std r9,24(r3)
|
|
-err2; std r10,32(r3)
|
|
-err2; std r11,40(r3)
|
|
-err2; std r12,48(r3)
|
|
-err2; std r14,56(r3)
|
|
-err2; std r15,64(r3)
|
|
-err2; std r16,72(r3)
|
|
-err2; std r17,80(r3)
|
|
-err2; std r18,88(r3)
|
|
-err2; std r19,96(r3)
|
|
-err2; std r20,104(r3)
|
|
-err2; std r21,112(r3)
|
|
-err2; std r22,120(r3)
|
|
- addi r3,r3,128
|
|
- subi r7,r7,128
|
|
- bdnz 4b
|
|
-
|
|
- clrldi r5,r5,(64-7)
|
|
-
|
|
- /* Up to 127B to go */
|
|
-5: srdi r6,r5,4
|
|
- mtocrf 0x01,r6
|
|
-
|
|
-6: bf cr7*4+1,7f
|
|
-err2; ld r0,0(r4)
|
|
-err2; ld r6,8(r4)
|
|
-err2; ld r8,16(r4)
|
|
-err2; ld r9,24(r4)
|
|
-err2; ld r10,32(r4)
|
|
-err2; ld r11,40(r4)
|
|
-err2; ld r12,48(r4)
|
|
-err2; ld r14,56(r4)
|
|
- addi r4,r4,64
|
|
-err2; std r0,0(r3)
|
|
-err2; std r6,8(r3)
|
|
-err2; std r8,16(r3)
|
|
-err2; std r9,24(r3)
|
|
-err2; std r10,32(r3)
|
|
-err2; std r11,40(r3)
|
|
-err2; std r12,48(r3)
|
|
-err2; std r14,56(r3)
|
|
- addi r3,r3,64
|
|
- subi r7,r7,64
|
|
-
|
|
-7: ld r14,STK_REG(R14)(r1)
|
|
- ld r15,STK_REG(R15)(r1)
|
|
- ld r16,STK_REG(R16)(r1)
|
|
- ld r17,STK_REG(R17)(r1)
|
|
- ld r18,STK_REG(R18)(r1)
|
|
- ld r19,STK_REG(R19)(r1)
|
|
- ld r20,STK_REG(R20)(r1)
|
|
- ld r21,STK_REG(R21)(r1)
|
|
- ld r22,STK_REG(R22)(r1)
|
|
- addi r1,r1,STACKFRAMESIZE
|
|
-
|
|
- /* Up to 63B to go */
|
|
- bf cr7*4+2,8f
|
|
-err1; ld r0,0(r4)
|
|
-err1; ld r6,8(r4)
|
|
-err1; ld r8,16(r4)
|
|
-err1; ld r9,24(r4)
|
|
- addi r4,r4,32
|
|
-err1; std r0,0(r3)
|
|
-err1; std r6,8(r3)
|
|
-err1; std r8,16(r3)
|
|
-err1; std r9,24(r3)
|
|
- addi r3,r3,32
|
|
- subi r7,r7,32
|
|
-
|
|
- /* Up to 31B to go */
|
|
-8: bf cr7*4+3,9f
|
|
-err1; ld r0,0(r4)
|
|
-err1; ld r6,8(r4)
|
|
- addi r4,r4,16
|
|
-err1; std r0,0(r3)
|
|
-err1; std r6,8(r3)
|
|
- addi r3,r3,16
|
|
- subi r7,r7,16
|
|
-
|
|
-9: clrldi r5,r5,(64-4)
|
|
-
|
|
- /* Up to 15B to go */
|
|
-.Lshort_copy:
|
|
- mtocrf 0x01,r5
|
|
- bf cr7*4+0,12f
|
|
-err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
|
|
-err1; lwz r6,4(r4)
|
|
- addi r4,r4,8
|
|
-err1; stw r0,0(r3)
|
|
-err1; stw r6,4(r3)
|
|
- addi r3,r3,8
|
|
- subi r7,r7,8
|
|
-
|
|
-12: bf cr7*4+1,13f
|
|
-err1; lwz r0,0(r4)
|
|
- addi r4,r4,4
|
|
-err1; stw r0,0(r3)
|
|
- addi r3,r3,4
|
|
- subi r7,r7,4
|
|
-
|
|
-13: bf cr7*4+2,14f
|
|
-err1; lhz r0,0(r4)
|
|
- addi r4,r4,2
|
|
-err1; sth r0,0(r3)
|
|
- addi r3,r3,2
|
|
- subi r7,r7,2
|
|
-
|
|
-14: bf cr7*4+3,15f
|
|
-err1; lbz r0,0(r4)
|
|
-err1; stb r0,0(r3)
|
|
-
|
|
-15: li r3,0
|
|
- blr
|
|
-
|
|
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
|
|
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
|
index 883da0abf7790..1f4104f8852b8 100644
|
|
--- a/arch/x86/Kconfig
|
|
+++ b/arch/x86/Kconfig
|
|
@@ -75,7 +75,7 @@ config X86
|
|
select ARCH_HAS_PTE_DEVMAP if X86_64
|
|
select ARCH_HAS_PTE_SPECIAL
|
|
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
|
|
- select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
|
|
+ select ARCH_HAS_COPY_MC if X86_64
|
|
select ARCH_HAS_SET_MEMORY
|
|
select ARCH_HAS_SET_DIRECT_MAP
|
|
select ARCH_HAS_STRICT_KERNEL_RWX
|
|
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
|
|
index 0dd319e6e5b49..ec98b400e38f9 100644
|
|
--- a/arch/x86/Kconfig.debug
|
|
+++ b/arch/x86/Kconfig.debug
|
|
@@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC
|
|
You should normally say N here, unless you want to debug early
|
|
crashes or need a very simple printk logging facility.
|
|
|
|
-config MCSAFE_TEST
|
|
+config COPY_MC_TEST
|
|
def_bool n
|
|
|
|
config EFI_PGT_DUMP
|
|
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
|
|
index 26c36357c4c9c..a023cbe21230a 100644
|
|
--- a/arch/x86/events/amd/ibs.c
|
|
+++ b/arch/x86/events/amd/ibs.c
|
|
@@ -89,6 +89,7 @@ struct perf_ibs {
|
|
u64 max_period;
|
|
unsigned long offset_mask[1];
|
|
int offset_max;
|
|
+ unsigned int fetch_count_reset_broken : 1;
|
|
struct cpu_perf_ibs __percpu *pcpu;
|
|
|
|
struct attribute **format_attrs;
|
|
@@ -363,7 +364,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
|
|
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
|
|
struct hw_perf_event *hwc, u64 config)
|
|
{
|
|
- wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
|
|
+ u64 tmp = hwc->config | config;
|
|
+
|
|
+ if (perf_ibs->fetch_count_reset_broken)
|
|
+ wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
|
|
+
|
|
+ wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
|
|
}
|
|
|
|
/*
|
|
@@ -733,6 +739,13 @@ static __init void perf_event_ibs_init(void)
|
|
{
|
|
struct attribute **attr = ibs_op_format_attrs;
|
|
|
|
+ /*
|
|
+ * Some chips fail to reset the fetch count when it is written; instead
|
|
+ * they need a 0-1 transition of IbsFetchEn.
|
|
+ */
|
|
+ if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
|
|
+ perf_ibs_fetch.fetch_count_reset_broken = 1;
|
|
+
|
|
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
|
|
|
if (ibs_caps & IBS_CAPS_OPCNT) {
|
|
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
|
|
new file mode 100644
|
|
index 0000000000000..e4991ba967266
|
|
--- /dev/null
|
|
+++ b/arch/x86/include/asm/copy_mc_test.h
|
|
@@ -0,0 +1,75 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+#ifndef _COPY_MC_TEST_H_
|
|
+#define _COPY_MC_TEST_H_
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+#ifdef CONFIG_COPY_MC_TEST
|
|
+extern unsigned long copy_mc_test_src;
|
|
+extern unsigned long copy_mc_test_dst;
|
|
+
|
|
+static inline void copy_mc_inject_src(void *addr)
|
|
+{
|
|
+ if (addr)
|
|
+ copy_mc_test_src = (unsigned long) addr;
|
|
+ else
|
|
+ copy_mc_test_src = ~0UL;
|
|
+}
|
|
+
|
|
+static inline void copy_mc_inject_dst(void *addr)
|
|
+{
|
|
+ if (addr)
|
|
+ copy_mc_test_dst = (unsigned long) addr;
|
|
+ else
|
|
+ copy_mc_test_dst = ~0UL;
|
|
+}
|
|
+#else /* CONFIG_COPY_MC_TEST */
|
|
+static inline void copy_mc_inject_src(void *addr)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline void copy_mc_inject_dst(void *addr)
|
|
+{
|
|
+}
|
|
+#endif /* CONFIG_COPY_MC_TEST */
|
|
+
|
|
+#else /* __ASSEMBLY__ */
|
|
+#include <asm/export.h>
|
|
+
|
|
+#ifdef CONFIG_COPY_MC_TEST
|
|
+.macro COPY_MC_TEST_CTL
|
|
+ .pushsection .data
|
|
+ .align 8
|
|
+ .globl copy_mc_test_src
|
|
+ copy_mc_test_src:
|
|
+ .quad 0
|
|
+ EXPORT_SYMBOL_GPL(copy_mc_test_src)
|
|
+ .globl copy_mc_test_dst
|
|
+ copy_mc_test_dst:
|
|
+ .quad 0
|
|
+ EXPORT_SYMBOL_GPL(copy_mc_test_dst)
|
|
+ .popsection
|
|
+.endm
|
|
+
|
|
+.macro COPY_MC_TEST_SRC reg count target
|
|
+ leaq \count(\reg), %r9
|
|
+ cmp copy_mc_test_src, %r9
|
|
+ ja \target
|
|
+.endm
|
|
+
|
|
+.macro COPY_MC_TEST_DST reg count target
|
|
+ leaq \count(\reg), %r9
|
|
+ cmp copy_mc_test_dst, %r9
|
|
+ ja \target
|
|
+.endm
|
|
+#else
|
|
+.macro COPY_MC_TEST_CTL
|
|
+.endm
|
|
+
|
|
+.macro COPY_MC_TEST_SRC reg count target
|
|
+.endm
|
|
+
|
|
+.macro COPY_MC_TEST_DST reg count target
|
|
+.endm
|
|
+#endif /* CONFIG_COPY_MC_TEST */
|
|
+#endif /* __ASSEMBLY__ */
|
|
+#endif /* _COPY_MC_TEST_H_ */
|
|
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
|
|
index cf503824529ce..9b9112e4379ab 100644
|
|
--- a/arch/x86/include/asm/mce.h
|
|
+++ b/arch/x86/include/asm/mce.h
|
|
@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
|
|
|
|
extern int mce_p5_enabled;
|
|
|
|
+#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
+extern void enable_copy_mc_fragile(void);
|
|
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
|
|
+#else
|
|
+static inline void enable_copy_mc_fragile(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_X86_MCE
|
|
int mcheck_init(void);
|
|
void mcheck_cpu_init(struct cpuinfo_x86 *c);
|
|
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
|
|
deleted file mode 100644
|
|
index eb59804b6201c..0000000000000
|
|
--- a/arch/x86/include/asm/mcsafe_test.h
|
|
+++ /dev/null
|
|
@@ -1,75 +0,0 @@
|
|
-/* SPDX-License-Identifier: GPL-2.0 */
|
|
-#ifndef _MCSAFE_TEST_H_
|
|
-#define _MCSAFE_TEST_H_
|
|
-
|
|
-#ifndef __ASSEMBLY__
|
|
-#ifdef CONFIG_MCSAFE_TEST
|
|
-extern unsigned long mcsafe_test_src;
|
|
-extern unsigned long mcsafe_test_dst;
|
|
-
|
|
-static inline void mcsafe_inject_src(void *addr)
|
|
-{
|
|
- if (addr)
|
|
- mcsafe_test_src = (unsigned long) addr;
|
|
- else
|
|
- mcsafe_test_src = ~0UL;
|
|
-}
|
|
-
|
|
-static inline void mcsafe_inject_dst(void *addr)
|
|
-{
|
|
- if (addr)
|
|
- mcsafe_test_dst = (unsigned long) addr;
|
|
- else
|
|
- mcsafe_test_dst = ~0UL;
|
|
-}
|
|
-#else /* CONFIG_MCSAFE_TEST */
|
|
-static inline void mcsafe_inject_src(void *addr)
|
|
-{
|
|
-}
|
|
-
|
|
-static inline void mcsafe_inject_dst(void *addr)
|
|
-{
|
|
-}
|
|
-#endif /* CONFIG_MCSAFE_TEST */
|
|
-
|
|
-#else /* __ASSEMBLY__ */
|
|
-#include <asm/export.h>
|
|
-
|
|
-#ifdef CONFIG_MCSAFE_TEST
|
|
-.macro MCSAFE_TEST_CTL
|
|
- .pushsection .data
|
|
- .align 8
|
|
- .globl mcsafe_test_src
|
|
- mcsafe_test_src:
|
|
- .quad 0
|
|
- EXPORT_SYMBOL_GPL(mcsafe_test_src)
|
|
- .globl mcsafe_test_dst
|
|
- mcsafe_test_dst:
|
|
- .quad 0
|
|
- EXPORT_SYMBOL_GPL(mcsafe_test_dst)
|
|
- .popsection
|
|
-.endm
|
|
-
|
|
-.macro MCSAFE_TEST_SRC reg count target
|
|
- leaq \count(\reg), %r9
|
|
- cmp mcsafe_test_src, %r9
|
|
- ja \target
|
|
-.endm
|
|
-
|
|
-.macro MCSAFE_TEST_DST reg count target
|
|
- leaq \count(\reg), %r9
|
|
- cmp mcsafe_test_dst, %r9
|
|
- ja \target
|
|
-.endm
|
|
-#else
|
|
-.macro MCSAFE_TEST_CTL
|
|
-.endm
|
|
-
|
|
-.macro MCSAFE_TEST_SRC reg count target
|
|
-.endm
|
|
-
|
|
-.macro MCSAFE_TEST_DST reg count target
|
|
-.endm
|
|
-#endif /* CONFIG_MCSAFE_TEST */
|
|
-#endif /* __ASSEMBLY__ */
|
|
-#endif /* _MCSAFE_TEST_H_ */
|
|
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
|
|
index 75314c3dbe471..6e450827f677a 100644
|
|
--- a/arch/x86/include/asm/string_64.h
|
|
+++ b/arch/x86/include/asm/string_64.h
|
|
@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
|
|
|
|
#endif
|
|
|
|
-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
|
|
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
|
|
- size_t cnt);
|
|
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
|
|
-
|
|
-/**
|
|
- * memcpy_mcsafe - copy memory with indication if a machine check happened
|
|
- *
|
|
- * @dst: destination address
|
|
- * @src: source address
|
|
- * @cnt: number of bytes to copy
|
|
- *
|
|
- * Low level memory copy function that catches machine checks
|
|
- * We only call into the "safe" function on systems that can
|
|
- * actually do machine check recovery. Everyone else can just
|
|
- * use memcpy().
|
|
- *
|
|
- * Return 0 for success, or number of bytes not copied if there was an
|
|
- * exception.
|
|
- */
|
|
-static __always_inline __must_check unsigned long
|
|
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
|
|
-{
|
|
-#ifdef CONFIG_X86_MCE
|
|
- if (static_branch_unlikely(&mcsafe_key))
|
|
- return __memcpy_mcsafe(dst, src, cnt);
|
|
- else
|
|
-#endif
|
|
- memcpy(dst, src, cnt);
|
|
- return 0;
|
|
-}
|
|
-
|
|
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
|
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
|
|
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
|
|
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
|
|
index 2f3e8f2a958f6..9bfca52b46411 100644
|
|
--- a/arch/x86/include/asm/uaccess.h
|
|
+++ b/arch/x86/include/asm/uaccess.h
|
|
@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
|
|
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
|
|
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
|
|
|
|
+#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
+unsigned long __must_check
|
|
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
|
|
+#define copy_mc_to_kernel copy_mc_to_kernel
|
|
+
|
|
+unsigned long __must_check
|
|
+copy_mc_to_user(void *to, const void *from, unsigned len);
|
|
+#endif
|
|
+
|
|
/*
|
|
* movsl can be slow when source and dest are not both 8-byte aligned
|
|
*/
|
|
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
|
|
index bc10e3dc64fed..e7265a552f4f0 100644
|
|
--- a/arch/x86/include/asm/uaccess_64.h
|
|
+++ b/arch/x86/include/asm/uaccess_64.h
|
|
@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
|
|
return ret;
|
|
}
|
|
|
|
-static __always_inline __must_check unsigned long
|
|
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
|
|
-{
|
|
- unsigned long ret;
|
|
-
|
|
- __uaccess_begin();
|
|
- /*
|
|
- * Note, __memcpy_mcsafe() is explicitly used since it can
|
|
- * handle exceptions / faults. memcpy_mcsafe() may fall back to
|
|
- * memcpy() which lacks this handling.
|
|
- */
|
|
- ret = __memcpy_mcsafe(to, from, len);
|
|
- __uaccess_end();
|
|
- return ret;
|
|
-}
|
|
-
|
|
static __always_inline __must_check unsigned long
|
|
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
|
|
{
|
|
@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
|
|
kasan_check_write(dst, size);
|
|
return __copy_user_flushcache(dst, src, size);
|
|
}
|
|
-
|
|
-unsigned long
|
|
-mcsafe_handle_tail(char *to, char *from, unsigned len);
|
|
-
|
|
#endif /* _ASM_X86_UACCESS_64_H */
|
|
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
|
|
index 07673a034d39c..69b2bb305a5a7 100644
|
|
--- a/arch/x86/kernel/cpu/mce/core.c
|
|
+++ b/arch/x86/kernel/cpu/mce/core.c
|
|
@@ -40,7 +40,6 @@
|
|
#include <linux/debugfs.h>
|
|
#include <linux/irq_work.h>
|
|
#include <linux/export.h>
|
|
-#include <linux/jump_label.h>
|
|
#include <linux/set_memory.h>
|
|
#include <linux/task_work.h>
|
|
#include <linux/hardirq.h>
|
|
@@ -2122,7 +2121,7 @@ void mce_disable_bank(int bank)
|
|
and older.
|
|
* mce=nobootlog Don't log MCEs from before booting.
|
|
* mce=bios_cmci_threshold Don't program the CMCI threshold
|
|
- * mce=recovery force enable memcpy_mcsafe()
|
|
+ * mce=recovery force enable copy_mc_fragile()
|
|
*/
|
|
static int __init mcheck_enable(char *str)
|
|
{
|
|
@@ -2730,13 +2729,10 @@ static void __init mcheck_debugfs_init(void)
|
|
static void __init mcheck_debugfs_init(void) { }
|
|
#endif
|
|
|
|
-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
|
|
-EXPORT_SYMBOL_GPL(mcsafe_key);
|
|
-
|
|
static int __init mcheck_late_init(void)
|
|
{
|
|
if (mca_cfg.recovery)
|
|
- static_branch_inc(&mcsafe_key);
|
|
+ enable_copy_mc_fragile();
|
|
|
|
mcheck_debugfs_init();
|
|
|
|
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
|
|
index 896d74cb5081a..e0296983a2386 100644
|
|
--- a/arch/x86/kernel/quirks.c
|
|
+++ b/arch/x86/kernel/quirks.c
|
|
@@ -8,6 +8,7 @@
|
|
|
|
#include <asm/hpet.h>
|
|
#include <asm/setup.h>
|
|
+#include <asm/mce.h>
|
|
|
|
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
|
|
|
|
@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
|
|
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
|
|
amd_disable_seq_and_redirect_scrub);
|
|
|
|
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
|
|
-#include <linux/jump_label.h>
|
|
-#include <asm/string_64.h>
|
|
-
|
|
/* Ivy Bridge, Haswell, Broadwell */
|
|
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
|
|
{
|
|
@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
|
|
pci_read_config_dword(pdev, 0x84, &capid0);
|
|
|
|
if (capid0 & 0x10)
|
|
- static_branch_inc(&mcsafe_key);
|
|
+ enable_copy_mc_fragile();
|
|
}
|
|
|
|
/* Skylake */
|
|
@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
|
|
* enabled, so memory machine check recovery is also enabled.
|
|
*/
|
|
if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
|
|
- static_branch_inc(&mcsafe_key);
|
|
+ enable_copy_mc_fragile();
|
|
|
|
}
|
|
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
|
|
@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
|
|
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
|
|
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
|
|
#endif
|
|
-#endif
|
|
|
|
bool x86_apple_machine;
|
|
EXPORT_SYMBOL(x86_apple_machine);
|
|
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
|
|
index 69cc823109740..d43df8de75a6a 100644
|
|
--- a/arch/x86/kernel/traps.c
|
|
+++ b/arch/x86/kernel/traps.c
|
|
@@ -196,7 +196,7 @@ static __always_inline void __user *error_get_trap_addr(struct pt_regs *regs)
|
|
|
|
DEFINE_IDTENTRY(exc_divide_error)
|
|
{
|
|
- do_error_trap(regs, 0, "divide_error", X86_TRAP_DE, SIGFPE,
|
|
+ do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
|
|
FPE_INTDIV, error_get_trap_addr(regs));
|
|
}
|
|
|
|
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
|
|
index 6110bce7237bd..02c3cec7e5157 100644
|
|
--- a/arch/x86/lib/Makefile
|
|
+++ b/arch/x86/lib/Makefile
|
|
@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
|
|
lib-y := delay.o misc.o cmdline.o cpu.o
|
|
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
|
|
lib-y += memcpy_$(BITS).o
|
|
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
|
|
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
|
|
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
|
|
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
|
|
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
|
|
new file mode 100644
|
|
index 0000000000000..c13e8c9ee926b
|
|
--- /dev/null
|
|
+++ b/arch/x86/lib/copy_mc.c
|
|
@@ -0,0 +1,96 @@
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
|
|
+
|
|
+#include <linux/jump_label.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/export.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/types.h>
|
|
+
|
|
+#include <asm/mce.h>
|
|
+
|
|
+#ifdef CONFIG_X86_MCE
|
|
+/*
|
|
+ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
|
|
+ * implementation.
|
|
+ */
|
|
+static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
|
|
+
|
|
+void enable_copy_mc_fragile(void)
|
|
+{
|
|
+ static_branch_inc(©_mc_fragile_key);
|
|
+}
|
|
+#define copy_mc_fragile_enabled (static_branch_unlikely(©_mc_fragile_key))
|
|
+
|
|
+/*
|
|
+ * Similar to copy_user_handle_tail, probe for the write fault point, or
|
|
+ * source exception point.
|
|
+ */
|
|
+__visible notrace unsigned long
|
|
+copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
|
|
+{
|
|
+ for (; len; --len, to++, from++)
|
|
+ if (copy_mc_fragile(to, from, 1))
|
|
+ break;
|
|
+ return len;
|
|
+}
|
|
+#else
|
|
+/*
|
|
+ * No point in doing careful copying, or consulting a static key when
|
|
+ * there is no #MC handler in the CONFIG_X86_MCE=n case.
|
|
+ */
|
|
+void enable_copy_mc_fragile(void)
|
|
+{
|
|
+}
|
|
+#define copy_mc_fragile_enabled (0)
|
|
+#endif
|
|
+
|
|
+unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
|
|
+
|
|
+/**
|
|
+ * copy_mc_to_kernel - memory copy that handles source exceptions
|
|
+ *
|
|
+ * @dst: destination address
|
|
+ * @src: source address
|
|
+ * @len: number of bytes to copy
|
|
+ *
|
|
+ * Call into the 'fragile' version on systems that benefit from avoiding
|
|
+ * corner case poison consumption scenarios, For example, accessing
|
|
+ * poison across 2 cachelines with a single instruction. Almost all
|
|
+ * other uses case can use copy_mc_enhanced_fast_string() for a fast
|
|
+ * recoverable copy, or fallback to plain memcpy.
|
|
+ *
|
|
+ * Return 0 for success, or number of bytes not copied if there was an
|
|
+ * exception.
|
|
+ */
|
|
+unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
|
|
+{
|
|
+ if (copy_mc_fragile_enabled)
|
|
+ return copy_mc_fragile(dst, src, len);
|
|
+ if (static_cpu_has(X86_FEATURE_ERMS))
|
|
+ return copy_mc_enhanced_fast_string(dst, src, len);
|
|
+ memcpy(dst, src, len);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
|
|
+
|
|
+unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
|
|
+{
|
|
+ unsigned long ret;
|
|
+
|
|
+ if (copy_mc_fragile_enabled) {
|
|
+ __uaccess_begin();
|
|
+ ret = copy_mc_fragile(dst, src, len);
|
|
+ __uaccess_end();
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ if (static_cpu_has(X86_FEATURE_ERMS)) {
|
|
+ __uaccess_begin();
|
|
+ ret = copy_mc_enhanced_fast_string(dst, src, len);
|
|
+ __uaccess_end();
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ return copy_user_generic(dst, src, len);
|
|
+}
|
|
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
|
|
new file mode 100644
|
|
index 0000000000000..892d8915f609e
|
|
--- /dev/null
|
|
+++ b/arch/x86/lib/copy_mc_64.S
|
|
@@ -0,0 +1,163 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0-only */
|
|
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include <asm/copy_mc_test.h>
|
|
+#include <asm/export.h>
|
|
+#include <asm/asm.h>
|
|
+
|
|
+#ifndef CONFIG_UML
|
|
+
|
|
+#ifdef CONFIG_X86_MCE
|
|
+COPY_MC_TEST_CTL
|
|
+
|
|
+/*
|
|
+ * copy_mc_fragile - copy memory with indication if an exception / fault happened
|
|
+ *
|
|
+ * The 'fragile' version is opted into by platform quirks and takes
|
|
+ * pains to avoid unrecoverable corner cases like 'fast-string'
|
|
+ * instruction sequences, and consuming poison across a cacheline
|
|
+ * boundary. The non-fragile version is equivalent to memcpy()
|
|
+ * regardless of CPU machine-check-recovery capability.
|
|
+ */
|
|
+SYM_FUNC_START(copy_mc_fragile)
|
|
+ cmpl $8, %edx
|
|
+ /* Less than 8 bytes? Go to byte copy loop */
|
|
+ jb .L_no_whole_words
|
|
+
|
|
+ /* Check for bad alignment of source */
|
|
+ testl $7, %esi
|
|
+ /* Already aligned */
|
|
+ jz .L_8byte_aligned
|
|
+
|
|
+ /* Copy one byte at a time until source is 8-byte aligned */
|
|
+ movl %esi, %ecx
|
|
+ andl $7, %ecx
|
|
+ subl $8, %ecx
|
|
+ negl %ecx
|
|
+ subl %ecx, %edx
|
|
+.L_read_leading_bytes:
|
|
+ movb (%rsi), %al
|
|
+ COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
|
|
+ COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
|
|
+.L_write_leading_bytes:
|
|
+ movb %al, (%rdi)
|
|
+ incq %rsi
|
|
+ incq %rdi
|
|
+ decl %ecx
|
|
+ jnz .L_read_leading_bytes
|
|
+
|
|
+.L_8byte_aligned:
|
|
+ movl %edx, %ecx
|
|
+ andl $7, %edx
|
|
+ shrl $3, %ecx
|
|
+ jz .L_no_whole_words
|
|
+
|
|
+.L_read_words:
|
|
+ movq (%rsi), %r8
|
|
+ COPY_MC_TEST_SRC %rsi 8 .E_read_words
|
|
+ COPY_MC_TEST_DST %rdi 8 .E_write_words
|
|
+.L_write_words:
|
|
+ movq %r8, (%rdi)
|
|
+ addq $8, %rsi
|
|
+ addq $8, %rdi
|
|
+ decl %ecx
|
|
+ jnz .L_read_words
|
|
+
|
|
+ /* Any trailing bytes? */
|
|
+.L_no_whole_words:
|
|
+ andl %edx, %edx
|
|
+ jz .L_done_memcpy_trap
|
|
+
|
|
+ /* Copy trailing bytes */
|
|
+ movl %edx, %ecx
|
|
+.L_read_trailing_bytes:
|
|
+ movb (%rsi), %al
|
|
+ COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
|
|
+ COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
|
|
+.L_write_trailing_bytes:
|
|
+ movb %al, (%rdi)
|
|
+ incq %rsi
|
|
+ incq %rdi
|
|
+ decl %ecx
|
|
+ jnz .L_read_trailing_bytes
|
|
+
|
|
+ /* Copy successful. Return zero */
|
|
+.L_done_memcpy_trap:
|
|
+ xorl %eax, %eax
|
|
+.L_done:
|
|
+ ret
|
|
+SYM_FUNC_END(copy_mc_fragile)
|
|
+EXPORT_SYMBOL_GPL(copy_mc_fragile)
|
|
+
|
|
+ .section .fixup, "ax"
|
|
+ /*
|
|
+ * Return number of bytes not copied for any failure. Note that
|
|
+ * there is no "tail" handling since the source buffer is 8-byte
|
|
+ * aligned and poison is cacheline aligned.
|
|
+ */
|
|
+.E_read_words:
|
|
+ shll $3, %ecx
|
|
+.E_leading_bytes:
|
|
+ addl %edx, %ecx
|
|
+.E_trailing_bytes:
|
|
+ mov %ecx, %eax
|
|
+ jmp .L_done
|
|
+
|
|
+ /*
|
|
+ * For write fault handling, given the destination is unaligned,
|
|
+ * we handle faults on multi-byte writes with a byte-by-byte
|
|
+ * copy up to the write-protected page.
|
|
+ */
|
|
+.E_write_words:
|
|
+ shll $3, %ecx
|
|
+ addl %edx, %ecx
|
|
+ movl %ecx, %edx
|
|
+ jmp copy_mc_fragile_handle_tail
|
|
+
|
|
+ .previous
|
|
+
|
|
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
|
|
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
|
|
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
|
|
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
|
|
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
|
|
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
|
|
+#endif /* CONFIG_X86_MCE */
|
|
+
|
|
+/*
|
|
+ * copy_mc_enhanced_fast_string - memory copy with exception handling
|
|
+ *
|
|
+ * Fast string copy + fault / exception handling. If the CPU does
|
|
+ * support machine check exception recovery, but does not support
|
|
+ * recovering from fast-string exceptions then this CPU needs to be
|
|
+ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
|
|
+ * machine check recovery support this version should be no slower than
|
|
+ * standard memcpy.
|
|
+ */
|
|
+SYM_FUNC_START(copy_mc_enhanced_fast_string)
|
|
+ movq %rdi, %rax
|
|
+ movq %rdx, %rcx
|
|
+.L_copy:
|
|
+ rep movsb
|
|
+ /* Copy successful. Return zero */
|
|
+ xorl %eax, %eax
|
|
+ ret
|
|
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
|
|
+
|
|
+ .section .fixup, "ax"
|
|
+.E_copy:
|
|
+ /*
|
|
+ * On fault %rcx is updated such that the copy instruction could
|
|
+ * optionally be restarted at the fault position, i.e. it
|
|
+ * contains 'bytes remaining'. A non-zero return indicates error
|
|
+ * to copy_mc_generic() users, or indicate short transfers to
|
|
+ * user-copy routines.
|
|
+ */
|
|
+ movq %rcx, %rax
|
|
+ ret
|
|
+
|
|
+ .previous
|
|
+
|
|
+ _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
|
|
+#endif /* !CONFIG_UML */
|
|
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
|
|
index bbcc05bcefadb..037faac46b0cc 100644
|
|
--- a/arch/x86/lib/memcpy_64.S
|
|
+++ b/arch/x86/lib/memcpy_64.S
|
|
@@ -4,7 +4,6 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/cpufeatures.h>
|
|
-#include <asm/mcsafe_test.h>
|
|
#include <asm/alternative-asm.h>
|
|
#include <asm/export.h>
|
|
|
|
@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
|
|
SYM_FUNC_END(memcpy_orig)
|
|
|
|
.popsection
|
|
-
|
|
-#ifndef CONFIG_UML
|
|
-
|
|
-MCSAFE_TEST_CTL
|
|
-
|
|
-/*
|
|
- * __memcpy_mcsafe - memory copy with machine check exception handling
|
|
- * Note that we only catch machine checks when reading the source addresses.
|
|
- * Writes to target are posted and don't generate machine checks.
|
|
- */
|
|
-SYM_FUNC_START(__memcpy_mcsafe)
|
|
- cmpl $8, %edx
|
|
- /* Less than 8 bytes? Go to byte copy loop */
|
|
- jb .L_no_whole_words
|
|
-
|
|
- /* Check for bad alignment of source */
|
|
- testl $7, %esi
|
|
- /* Already aligned */
|
|
- jz .L_8byte_aligned
|
|
-
|
|
- /* Copy one byte at a time until source is 8-byte aligned */
|
|
- movl %esi, %ecx
|
|
- andl $7, %ecx
|
|
- subl $8, %ecx
|
|
- negl %ecx
|
|
- subl %ecx, %edx
|
|
-.L_read_leading_bytes:
|
|
- movb (%rsi), %al
|
|
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
|
|
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
|
|
-.L_write_leading_bytes:
|
|
- movb %al, (%rdi)
|
|
- incq %rsi
|
|
- incq %rdi
|
|
- decl %ecx
|
|
- jnz .L_read_leading_bytes
|
|
-
|
|
-.L_8byte_aligned:
|
|
- movl %edx, %ecx
|
|
- andl $7, %edx
|
|
- shrl $3, %ecx
|
|
- jz .L_no_whole_words
|
|
-
|
|
-.L_read_words:
|
|
- movq (%rsi), %r8
|
|
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
|
|
- MCSAFE_TEST_DST %rdi 8 .E_write_words
|
|
-.L_write_words:
|
|
- movq %r8, (%rdi)
|
|
- addq $8, %rsi
|
|
- addq $8, %rdi
|
|
- decl %ecx
|
|
- jnz .L_read_words
|
|
-
|
|
- /* Any trailing bytes? */
|
|
-.L_no_whole_words:
|
|
- andl %edx, %edx
|
|
- jz .L_done_memcpy_trap
|
|
-
|
|
- /* Copy trailing bytes */
|
|
- movl %edx, %ecx
|
|
-.L_read_trailing_bytes:
|
|
- movb (%rsi), %al
|
|
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
|
|
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
|
|
-.L_write_trailing_bytes:
|
|
- movb %al, (%rdi)
|
|
- incq %rsi
|
|
- incq %rdi
|
|
- decl %ecx
|
|
- jnz .L_read_trailing_bytes
|
|
-
|
|
- /* Copy successful. Return zero */
|
|
-.L_done_memcpy_trap:
|
|
- xorl %eax, %eax
|
|
-.L_done:
|
|
- ret
|
|
-SYM_FUNC_END(__memcpy_mcsafe)
|
|
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
|
|
-
|
|
- .section .fixup, "ax"
|
|
- /*
|
|
- * Return number of bytes not copied for any failure. Note that
|
|
- * there is no "tail" handling since the source buffer is 8-byte
|
|
- * aligned and poison is cacheline aligned.
|
|
- */
|
|
-.E_read_words:
|
|
- shll $3, %ecx
|
|
-.E_leading_bytes:
|
|
- addl %edx, %ecx
|
|
-.E_trailing_bytes:
|
|
- mov %ecx, %eax
|
|
- jmp .L_done
|
|
-
|
|
- /*
|
|
- * For write fault handling, given the destination is unaligned,
|
|
- * we handle faults on multi-byte writes with a byte-by-byte
|
|
- * copy up to the write-protected page.
|
|
- */
|
|
-.E_write_words:
|
|
- shll $3, %ecx
|
|
- addl %edx, %ecx
|
|
- movl %ecx, %edx
|
|
- jmp mcsafe_handle_tail
|
|
-
|
|
- .previous
|
|
-
|
|
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
|
|
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
|
|
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
|
|
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
|
|
- _ASM_EXTABLE(.L_write_words, .E_write_words)
|
|
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
|
|
-#endif
|
|
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
|
|
index 1847e993ac63a..508c81e97ab10 100644
|
|
--- a/arch/x86/lib/usercopy_64.c
|
|
+++ b/arch/x86/lib/usercopy_64.c
|
|
@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
|
|
}
|
|
EXPORT_SYMBOL(clear_user);
|
|
|
|
-/*
|
|
- * Similar to copy_user_handle_tail, probe for the write fault point,
|
|
- * but reuse __memcpy_mcsafe in case a new read error is encountered.
|
|
- * clac() is handled in _copy_to_iter_mcsafe().
|
|
- */
|
|
-__visible notrace unsigned long
|
|
-mcsafe_handle_tail(char *to, char *from, unsigned len)
|
|
-{
|
|
- for (; len; --len, to++, from++) {
|
|
- /*
|
|
- * Call the assembly routine back directly since
|
|
- * memcpy_mcsafe() may silently fallback to memcpy.
|
|
- */
|
|
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
|
|
-
|
|
- if (rem)
|
|
- break;
|
|
- }
|
|
- return len;
|
|
-}
|
|
-
|
|
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
|
|
/**
|
|
* clean_cache_range - write back a cache range with CLWB
|
|
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
|
|
index 00c62115f39cd..0aaf31917061d 100644
|
|
--- a/arch/x86/pci/intel_mid_pci.c
|
|
+++ b/arch/x86/pci/intel_mid_pci.c
|
|
@@ -33,6 +33,7 @@
|
|
#include <asm/hw_irq.h>
|
|
#include <asm/io_apic.h>
|
|
#include <asm/intel-mid.h>
|
|
+#include <asm/acpi.h>
|
|
|
|
#define PCIE_CAP_OFFSET 0x100
|
|
|
|
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
|
|
index c46b9f2e732ff..6e39eda00c2c9 100644
|
|
--- a/arch/x86/xen/enlighten_pv.c
|
|
+++ b/arch/x86/xen/enlighten_pv.c
|
|
@@ -1438,6 +1438,15 @@ asmlinkage __visible void __init xen_start_kernel(void)
|
|
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
|
|
|
|
xen_boot_params_init_edd();
|
|
+
|
|
+#ifdef CONFIG_ACPI
|
|
+ /*
|
|
+ * Disable selecting "Firmware First mode" for correctable
|
|
+ * memory errors, as this is the duty of the hypervisor to
|
|
+ * decide.
|
|
+ */
|
|
+ acpi_disable_cmcff = 1;
|
|
+#endif
|
|
}
|
|
|
|
if (!boot_params.screen_info.orig_video_isVGA)
|
|
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
|
|
index d991dd46e89cc..98b8baa47dc5e 100644
|
|
--- a/drivers/ata/ahci.h
|
|
+++ b/drivers/ata/ahci.h
|
|
@@ -240,6 +240,8 @@ enum {
|
|
as default lpm_policy */
|
|
AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during
|
|
suspend/resume */
|
|
+ AHCI_HFLAG_IGN_NOTSUPP_POWER_ON = (1 << 27), /* ignore -EOPNOTSUPP
|
|
+ from phy_power_on() */
|
|
|
|
/* ap->flags bits */
|
|
|
|
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
|
|
index d4bba3ace45d7..3ad46d26d9d51 100644
|
|
--- a/drivers/ata/ahci_mvebu.c
|
|
+++ b/drivers/ata/ahci_mvebu.c
|
|
@@ -227,7 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = {
|
|
|
|
static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = {
|
|
.plat_config = ahci_mvebu_armada_3700_config,
|
|
- .flags = AHCI_HFLAG_SUSPEND_PHYS,
|
|
+ .flags = AHCI_HFLAG_SUSPEND_PHYS | AHCI_HFLAG_IGN_NOTSUPP_POWER_ON,
|
|
};
|
|
|
|
static const struct of_device_id ahci_mvebu_of_match[] = {
|
|
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
|
|
index 129556fcf6be7..a1cbb894e5f0a 100644
|
|
--- a/drivers/ata/libahci_platform.c
|
|
+++ b/drivers/ata/libahci_platform.c
|
|
@@ -59,7 +59,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
|
|
}
|
|
|
|
rc = phy_power_on(hpriv->phys[i]);
|
|
- if (rc) {
|
|
+ if (rc && !(rc == -EOPNOTSUPP && (hpriv->flags & AHCI_HFLAG_IGN_NOTSUPP_POWER_ON))) {
|
|
phy_exit(hpriv->phys[i]);
|
|
goto disable_phys;
|
|
}
|
|
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
|
|
index 141ac600b64c8..44b0ed8f6bb8a 100644
|
|
--- a/drivers/ata/sata_rcar.c
|
|
+++ b/drivers/ata/sata_rcar.c
|
|
@@ -120,7 +120,7 @@
|
|
/* Descriptor table word 0 bit (when DTA32M = 1) */
|
|
#define SATA_RCAR_DTEND BIT(0)
|
|
|
|
-#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL
|
|
+#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL
|
|
|
|
/* Gen2 Physical Layer Control Registers */
|
|
#define RCAR_GEN2_PHY_CTL1_REG 0x1704
|
|
diff --git a/drivers/base/firmware_loader/fallback_platform.c b/drivers/base/firmware_loader/fallback_platform.c
|
|
index 685edb7dd05a7..6958ab1a80593 100644
|
|
--- a/drivers/base/firmware_loader/fallback_platform.c
|
|
+++ b/drivers/base/firmware_loader/fallback_platform.c
|
|
@@ -17,7 +17,7 @@ int firmware_fallback_platform(struct fw_priv *fw_priv, u32 opt_flags)
|
|
if (!(opt_flags & FW_OPT_FALLBACK_PLATFORM))
|
|
return -ENOENT;
|
|
|
|
- rc = security_kernel_load_data(LOADING_FIRMWARE_EFI_EMBEDDED);
|
|
+ rc = security_kernel_load_data(LOADING_FIRMWARE);
|
|
if (rc)
|
|
return rc;
|
|
|
|
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
|
|
index bad8e90ba168d..62fbc7df022bc 100644
|
|
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
|
|
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
|
|
@@ -772,14 +772,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
|
|
if (rpl->status != CPL_ERR_NONE) {
|
|
pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
|
|
rpl->status, stid);
|
|
- return CPL_RET_BUF_DONE;
|
|
+ } else {
|
|
+ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
|
|
+ sock_put(listen_ctx->lsk);
|
|
+ kfree(listen_ctx);
|
|
+ module_put(THIS_MODULE);
|
|
}
|
|
- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
|
|
- sock_put(listen_ctx->lsk);
|
|
- kfree(listen_ctx);
|
|
- module_put(THIS_MODULE);
|
|
-
|
|
- return 0;
|
|
+ return CPL_RET_BUF_DONE;
|
|
}
|
|
|
|
static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
|
|
@@ -796,15 +795,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb)
|
|
if (rpl->status != CPL_ERR_NONE) {
|
|
pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
|
|
rpl->status, stid);
|
|
- return CPL_RET_BUF_DONE;
|
|
+ } else {
|
|
+ cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
|
|
+ sock_put(listen_ctx->lsk);
|
|
+ kfree(listen_ctx);
|
|
+ module_put(THIS_MODULE);
|
|
}
|
|
-
|
|
- cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family);
|
|
- sock_put(listen_ctx->lsk);
|
|
- kfree(listen_ctx);
|
|
- module_put(THIS_MODULE);
|
|
-
|
|
- return 0;
|
|
+ return CPL_RET_BUF_DONE;
|
|
}
|
|
|
|
static void chtls_purge_wr_queue(struct sock *sk)
|
|
@@ -1513,7 +1510,6 @@ static void add_to_reap_list(struct sock *sk)
|
|
struct chtls_sock *csk = sk->sk_user_data;
|
|
|
|
local_bh_disable();
|
|
- bh_lock_sock(sk);
|
|
release_tcp_port(sk); /* release the port immediately */
|
|
|
|
spin_lock(&reap_list_lock);
|
|
@@ -1522,7 +1518,6 @@ static void add_to_reap_list(struct sock *sk)
|
|
if (!csk->passive_reap_next)
|
|
schedule_work(&reap_task);
|
|
spin_unlock(&reap_list_lock);
|
|
- bh_unlock_sock(sk);
|
|
local_bh_enable();
|
|
}
|
|
|
|
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
|
|
index 9fb5ca6682ea2..188d871f6b8cd 100644
|
|
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
|
|
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
|
|
@@ -1585,6 +1585,7 @@ skip_copy:
|
|
tp->urg_data = 0;
|
|
|
|
if ((avail + offset) >= skb->len) {
|
|
+ struct sk_buff *next_skb;
|
|
if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
|
|
tp->copied_seq += skb->len;
|
|
hws->rcvpld = skb->hdr_len;
|
|
@@ -1595,8 +1596,10 @@ skip_copy:
|
|
chtls_free_skb(sk, skb);
|
|
buffers_freed++;
|
|
hws->copied_seq = 0;
|
|
- if (copied >= target &&
|
|
- !skb_peek(&sk->sk_receive_queue))
|
|
+ next_skb = skb_peek(&sk->sk_receive_queue);
|
|
+ if (copied >= target && !next_skb)
|
|
+ break;
|
|
+ if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR)
|
|
break;
|
|
}
|
|
} while (len > 0);
|
|
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
|
|
index e5bfac79e5ac9..04f5d79d42653 100644
|
|
--- a/drivers/firmware/efi/libstub/arm64-stub.c
|
|
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
|
|
@@ -62,10 +62,12 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
|
|
status = efi_get_random_bytes(sizeof(phys_seed),
|
|
(u8 *)&phys_seed);
|
|
if (status == EFI_NOT_FOUND) {
|
|
- efi_info("EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
|
|
+ efi_info("EFI_RNG_PROTOCOL unavailable, KASLR will be disabled\n");
|
|
+ efi_nokaslr = true;
|
|
} else if (status != EFI_SUCCESS) {
|
|
- efi_err("efi_get_random_bytes() failed\n");
|
|
- return status;
|
|
+ efi_err("efi_get_random_bytes() failed (0x%lx), KASLR will be disabled\n",
|
|
+ status);
|
|
+ efi_nokaslr = true;
|
|
}
|
|
} else {
|
|
efi_info("KASLR disabled on kernel command line\n");
|
|
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
|
|
index 11ecf3c4640eb..368cd60000eec 100644
|
|
--- a/drivers/firmware/efi/libstub/fdt.c
|
|
+++ b/drivers/firmware/efi/libstub/fdt.c
|
|
@@ -136,7 +136,7 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
|
|
if (status)
|
|
goto fdt_set_fail;
|
|
|
|
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
|
|
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
|
|
efi_status_t efi_status;
|
|
|
|
efi_status = efi_get_random_bytes(sizeof(fdt_val64),
|
|
@@ -145,8 +145,6 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size,
|
|
status = fdt_setprop_var(fdt, node, "kaslr-seed", fdt_val64);
|
|
if (status)
|
|
goto fdt_set_fail;
|
|
- } else if (efi_status != EFI_NOT_FOUND) {
|
|
- return efi_status;
|
|
}
|
|
}
|
|
|
|
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
|
|
index e7532e7d74e91..0e1f11669b072 100644
|
|
--- a/drivers/gpu/drm/i915/i915_debugfs.c
|
|
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
|
|
@@ -323,6 +323,7 @@ static void print_context_stats(struct seq_file *m,
|
|
}
|
|
i915_gem_context_unlock_engines(ctx);
|
|
|
|
+ mutex_lock(&ctx->mutex);
|
|
if (!IS_ERR_OR_NULL(ctx->file_priv)) {
|
|
struct file_stats stats = {
|
|
.vm = rcu_access_pointer(ctx->vm),
|
|
@@ -343,6 +344,7 @@ static void print_context_stats(struct seq_file *m,
|
|
|
|
print_file_stats(m, name, stats);
|
|
}
|
|
+ mutex_unlock(&ctx->mutex);
|
|
|
|
spin_lock(&i915->gem.contexts.lock);
|
|
list_safe_reset_next(ctx, cn, link);
|
|
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
|
|
index 3a98439bba832..0abce004a9591 100644
|
|
--- a/drivers/infiniband/core/addr.c
|
|
+++ b/drivers/infiniband/core/addr.c
|
|
@@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work)
|
|
req->callback = NULL;
|
|
|
|
spin_lock_bh(&lock);
|
|
+ /*
|
|
+ * Although the work will normally have been canceled by the workqueue,
|
|
+ * it can still be requeued as long as it is on the req_list.
|
|
+ */
|
|
+ cancel_delayed_work(&req->work);
|
|
if (!list_empty(&req->list)) {
|
|
- /*
|
|
- * Although the work will normally have been canceled by the
|
|
- * workqueue, it can still be requeued as long as it is on the
|
|
- * req_list.
|
|
- */
|
|
- cancel_delayed_work(&req->work);
|
|
list_del_init(&req->list);
|
|
kfree(req);
|
|
}
|
|
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
|
|
index 1533419f18758..de467a1303db3 100644
|
|
--- a/drivers/md/dm-writecache.c
|
|
+++ b/drivers/md/dm-writecache.c
|
|
@@ -49,7 +49,7 @@ do { \
|
|
#define pmem_assign(dest, src) ((dest) = (src))
|
|
#endif
|
|
|
|
-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
|
|
+#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
|
|
#define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
|
|
#endif
|
|
|
|
@@ -992,7 +992,8 @@ static void writecache_resume(struct dm_target *ti)
|
|
}
|
|
wc->freelist_size = 0;
|
|
|
|
- r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
|
|
+ r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
|
|
+ sizeof(uint64_t));
|
|
if (r) {
|
|
writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
|
|
sb_seq_count = cpu_to_le64(0);
|
|
@@ -1008,7 +1009,8 @@ static void writecache_resume(struct dm_target *ti)
|
|
e->seq_count = -1;
|
|
continue;
|
|
}
|
|
- r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
|
|
+ r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
|
|
+ sizeof(struct wc_memory_entry));
|
|
if (r) {
|
|
writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
|
|
(unsigned long)b, r);
|
|
@@ -1206,7 +1208,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
|
|
|
|
if (rw == READ) {
|
|
int r;
|
|
- r = memcpy_mcsafe(buf, data, size);
|
|
+ r = copy_mc_to_kernel(buf, data, size);
|
|
flush_dcache_page(bio_page(bio));
|
|
if (unlikely(r)) {
|
|
writecache_error(wc, r, "hardware memory error when reading data: %d", r);
|
|
@@ -2349,7 +2351,7 @@ invalid_optional:
|
|
}
|
|
}
|
|
|
|
- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
|
|
+ r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
|
|
if (r) {
|
|
ti->error = "Hardware memory error when reading superblock";
|
|
goto bad;
|
|
@@ -2360,7 +2362,8 @@ invalid_optional:
|
|
ti->error = "Unable to initialize device";
|
|
goto bad;
|
|
}
|
|
- r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
|
|
+ r = copy_mc_to_kernel(&s, sb(wc),
|
|
+ sizeof(struct wc_memory_superblock));
|
|
if (r) {
|
|
ti->error = "Hardware memory error when reading superblock";
|
|
goto bad;
|
|
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
|
|
index 82246f7aec6fb..e39b118b945f8 100644
|
|
--- a/drivers/misc/cardreader/rtsx_pcr.c
|
|
+++ b/drivers/misc/cardreader/rtsx_pcr.c
|
|
@@ -1172,10 +1172,6 @@ void rtsx_pci_init_ocp(struct rtsx_pcr *pcr)
|
|
rtsx_pci_write_register(pcr, REG_OCPGLITCH,
|
|
SD_OCP_GLITCH_MASK, pcr->hw_param.ocp_glitch);
|
|
rtsx_pci_enable_ocp(pcr);
|
|
- } else {
|
|
- /* OC power down */
|
|
- rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
|
|
- OC_POWER_DOWN);
|
|
}
|
|
}
|
|
}
|
|
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
|
|
index 25a9dd9c0c1b5..2ba899f5659ff 100644
|
|
--- a/drivers/misc/cxl/pci.c
|
|
+++ b/drivers/misc/cxl/pci.c
|
|
@@ -393,8 +393,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
|
|
*capp_unit_id = get_capp_unit_id(np, *phb_index);
|
|
of_node_put(np);
|
|
if (!*capp_unit_id) {
|
|
- pr_err("cxl: invalid capp unit id (phb_index: %d)\n",
|
|
- *phb_index);
|
|
+ pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n",
|
|
+ *chipid, *phb_index);
|
|
return -ENODEV;
|
|
}
|
|
|
|
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
|
|
index dd07db656a5c3..f3c125d50d7a0 100644
|
|
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
|
|
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
|
|
@@ -1158,16 +1158,6 @@ static void bnxt_queue_sp_work(struct bnxt *bp)
|
|
schedule_work(&bp->sp_task);
|
|
}
|
|
|
|
-static void bnxt_cancel_sp_work(struct bnxt *bp)
|
|
-{
|
|
- if (BNXT_PF(bp)) {
|
|
- flush_workqueue(bnxt_pf_wq);
|
|
- } else {
|
|
- cancel_work_sync(&bp->sp_task);
|
|
- cancel_delayed_work_sync(&bp->fw_reset_task);
|
|
- }
|
|
-}
|
|
-
|
|
static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
|
|
{
|
|
if (!rxr->bnapi->in_reset) {
|
|
@@ -4198,7 +4188,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
|
|
u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM;
|
|
u16 dst = BNXT_HWRM_CHNL_CHIMP;
|
|
|
|
- if (BNXT_NO_FW_ACCESS(bp))
|
|
+ if (BNXT_NO_FW_ACCESS(bp) &&
|
|
+ le16_to_cpu(req->req_type) != HWRM_FUNC_RESET)
|
|
return -EBUSY;
|
|
|
|
if (msg_len > BNXT_HWRM_MAX_REQ_LEN) {
|
|
@@ -9247,7 +9238,10 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
|
|
{
|
|
int rc = 0;
|
|
|
|
- rc = __bnxt_open_nic(bp, irq_re_init, link_re_init);
|
|
+ if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state))
|
|
+ rc = -EIO;
|
|
+ if (!rc)
|
|
+ rc = __bnxt_open_nic(bp, irq_re_init, link_re_init);
|
|
if (rc) {
|
|
netdev_err(bp->dev, "nic open fail (rc: %x)\n", rc);
|
|
dev_close(bp->dev);
|
|
@@ -11505,15 +11499,17 @@ static void bnxt_remove_one(struct pci_dev *pdev)
|
|
if (BNXT_PF(bp))
|
|
bnxt_sriov_disable(bp);
|
|
|
|
- clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
|
|
- bnxt_cancel_sp_work(bp);
|
|
- bp->sp_event = 0;
|
|
-
|
|
- bnxt_dl_fw_reporters_destroy(bp, true);
|
|
if (BNXT_PF(bp))
|
|
devlink_port_type_clear(&bp->dl_port);
|
|
pci_disable_pcie_error_reporting(pdev);
|
|
unregister_netdev(dev);
|
|
+ clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
|
|
+ /* Flush any pending tasks */
|
|
+ cancel_work_sync(&bp->sp_task);
|
|
+ cancel_delayed_work_sync(&bp->fw_reset_task);
|
|
+ bp->sp_event = 0;
|
|
+
|
|
+ bnxt_dl_fw_reporters_destroy(bp, true);
|
|
bnxt_dl_unregister(bp);
|
|
bnxt_shutdown_tc(bp);
|
|
|
|
@@ -12238,6 +12234,9 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
}
|
|
|
|
+ if (state == pci_channel_io_frozen)
|
|
+ set_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state);
|
|
+
|
|
if (netif_running(netdev))
|
|
bnxt_close(netdev);
|
|
|
|
@@ -12264,7 +12263,7 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
|
|
{
|
|
struct net_device *netdev = pci_get_drvdata(pdev);
|
|
struct bnxt *bp = netdev_priv(netdev);
|
|
- int err = 0;
|
|
+ int err = 0, off;
|
|
pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
|
|
|
|
netdev_info(bp->dev, "PCI Slot Reset\n");
|
|
@@ -12276,6 +12275,20 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
|
|
"Cannot re-enable PCI device after reset.\n");
|
|
} else {
|
|
pci_set_master(pdev);
|
|
+ /* Upon fatal error, our device internal logic that latches to
|
|
+ * BAR value is getting reset and will restore only upon
|
|
+ * rewritting the BARs.
|
|
+ *
|
|
+ * As pci_restore_state() does not re-write the BARs if the
|
|
+ * value is same as saved value earlier, driver needs to
|
|
+ * write the BARs to 0 to force restore, in case of fatal error.
|
|
+ */
|
|
+ if (test_and_clear_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN,
|
|
+ &bp->state)) {
|
|
+ for (off = PCI_BASE_ADDRESS_0;
|
|
+ off <= PCI_BASE_ADDRESS_5; off += 4)
|
|
+ pci_write_config_dword(bp->pdev, off, 0);
|
|
+ }
|
|
pci_restore_state(pdev);
|
|
pci_save_state(pdev);
|
|
|
|
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
|
|
index 440b43c8068f1..a80ac2ae57a68 100644
|
|
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
|
|
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
|
|
@@ -1672,6 +1672,7 @@ struct bnxt {
|
|
#define BNXT_STATE_ABORT_ERR 5
|
|
#define BNXT_STATE_FW_FATAL_COND 6
|
|
#define BNXT_STATE_DRV_REGISTERED 7
|
|
+#define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8
|
|
|
|
#define BNXT_NO_FW_ACCESS(bp) \
|
|
(test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \
|
|
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
|
|
index ff0d82e2535da..fd33c888046b9 100644
|
|
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
|
|
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
|
|
@@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f)
|
|
int err;
|
|
|
|
/* do a set-tcb for smac-sel and CWR bit.. */
|
|
- err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
|
|
- if (err)
|
|
- goto smac_err;
|
|
-
|
|
err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W,
|
|
TCB_SMAC_SEL_V(TCB_SMAC_SEL_M),
|
|
TCB_SMAC_SEL_V(f->smt->idx), 1);
|
|
+ if (err)
|
|
+ goto smac_err;
|
|
+
|
|
+ err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
|
|
if (!err)
|
|
return 0;
|
|
|
|
@@ -865,6 +865,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
|
|
FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
|
|
FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
|
|
FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
|
|
+ FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
|
|
FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
|
|
f->fs.newvlan == VLAN_REWRITE) |
|
|
FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
|
|
@@ -882,7 +883,7 @@ int set_filter_wr(struct adapter *adapter, int fidx)
|
|
FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
|
|
FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
|
|
FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
|
|
- fwr->smac_sel = 0;
|
|
+ fwr->smac_sel = f->smt->idx;
|
|
fwr->rx_chan_rx_rpl_iq =
|
|
htons(FW_FILTER_WR_RX_CHAN_V(0) |
|
|
FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
|
|
@@ -1321,11 +1322,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
|
|
TX_QUEUE_V(f->fs.nat_mode) |
|
|
T5_OPT_2_VALID_F |
|
|
RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
|
|
- CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
|
|
- (f->fs.dirsteer << 1)) |
|
|
PACE_V((f->fs.maskhash) |
|
|
- ((f->fs.dirsteerhash) << 1)) |
|
|
- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
|
|
+ ((f->fs.dirsteerhash) << 1)));
|
|
}
|
|
|
|
static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
|
|
@@ -1361,11 +1359,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
|
|
TX_QUEUE_V(f->fs.nat_mode) |
|
|
T5_OPT_2_VALID_F |
|
|
RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
|
|
- CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
|
|
- (f->fs.dirsteer << 1)) |
|
|
PACE_V((f->fs.maskhash) |
|
|
- ((f->fs.dirsteerhash) << 1)) |
|
|
- CCTRL_ECN_V(f->fs.action == FILTER_SWITCH));
|
|
+ ((f->fs.dirsteerhash) << 1)));
|
|
}
|
|
|
|
static int cxgb4_set_hash_filter(struct net_device *dev,
|
|
@@ -2037,6 +2032,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl)
|
|
}
|
|
return;
|
|
}
|
|
+ switch (f->fs.action) {
|
|
+ case FILTER_PASS:
|
|
+ if (f->fs.dirsteer)
|
|
+ set_tcb_tflag(adap, f, tid,
|
|
+ TF_DIRECT_STEER_S, 1, 1);
|
|
+ break;
|
|
+ case FILTER_DROP:
|
|
+ set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1);
|
|
+ break;
|
|
+ case FILTER_SWITCH:
|
|
+ set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1);
|
|
+ break;
|
|
+ }
|
|
+
|
|
break;
|
|
|
|
default:
|
|
@@ -2104,22 +2113,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
|
|
if (ctx)
|
|
ctx->result = 0;
|
|
} else if (ret == FW_FILTER_WR_FLT_ADDED) {
|
|
- int err = 0;
|
|
-
|
|
- if (f->fs.newsmac)
|
|
- err = configure_filter_smac(adap, f);
|
|
-
|
|
- if (!err) {
|
|
- f->pending = 0; /* async setup completed */
|
|
- f->valid = 1;
|
|
- if (ctx) {
|
|
- ctx->result = 0;
|
|
- ctx->tid = idx;
|
|
- }
|
|
- } else {
|
|
- clear_filter(adap, f);
|
|
- if (ctx)
|
|
- ctx->result = err;
|
|
+ f->pending = 0; /* async setup completed */
|
|
+ f->valid = 1;
|
|
+ if (ctx) {
|
|
+ ctx->result = 0;
|
|
+ ctx->tid = idx;
|
|
}
|
|
} else {
|
|
/* Something went wrong. Issue a warning about the
|
|
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
|
|
index 50232e063f49e..92473dda55d9f 100644
|
|
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
|
|
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
|
|
@@ -50,6 +50,10 @@
|
|
#define TCB_T_FLAGS_M 0xffffffffffffffffULL
|
|
#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S)
|
|
|
|
+#define TF_DROP_S 22
|
|
+#define TF_DIRECT_STEER_S 23
|
|
+#define TF_LPBK_S 59
|
|
+
|
|
#define TF_CCTRL_ECE_S 60
|
|
#define TF_CCTRL_CWR_S 61
|
|
#define TF_CCTRL_RFR_S 62
|
|
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
|
|
index 9162856de1b19..ab15f1c588b3a 100644
|
|
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
|
|
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
|
|
@@ -3146,8 +3146,8 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
|
|
hclgevf_uninit_msi(hdev);
|
|
}
|
|
|
|
- hclgevf_pci_uninit(hdev);
|
|
hclgevf_cmd_uninit(hdev);
|
|
+ hclgevf_pci_uninit(hdev);
|
|
hclgevf_uninit_mac_list(hdev);
|
|
}
|
|
|
|
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
|
|
index 7ef3369953b6a..c3ec9ceed833e 100644
|
|
--- a/drivers/net/ethernet/ibm/ibmveth.c
|
|
+++ b/drivers/net/ethernet/ibm/ibmveth.c
|
|
@@ -1031,12 +1031,6 @@ static int ibmveth_is_packet_unsupported(struct sk_buff *skb,
|
|
ret = -EOPNOTSUPP;
|
|
}
|
|
|
|
- if (!ether_addr_equal(ether_header->h_source, netdev->dev_addr)) {
|
|
- netdev_dbg(netdev, "source packet MAC address does not match veth device's, dropping packet.\n");
|
|
- netdev->stats.tx_dropped++;
|
|
- ret = -EOPNOTSUPP;
|
|
- }
|
|
-
|
|
return ret;
|
|
}
|
|
|
|
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
|
|
index 3e0aab04d86fb..f96bb3dab5a8b 100644
|
|
--- a/drivers/net/ethernet/ibm/ibmvnic.c
|
|
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
|
|
@@ -1828,9 +1828,13 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
|
|
int rc;
|
|
|
|
rc = 0;
|
|
- ether_addr_copy(adapter->mac_addr, addr->sa_data);
|
|
- if (adapter->state != VNIC_PROBED)
|
|
+ if (!is_valid_ether_addr(addr->sa_data))
|
|
+ return -EADDRNOTAVAIL;
|
|
+
|
|
+ if (adapter->state != VNIC_PROBED) {
|
|
+ ether_addr_copy(adapter->mac_addr, addr->sa_data);
|
|
rc = __ibmvnic_set_mac(netdev, addr->sa_data);
|
|
+ }
|
|
|
|
return rc;
|
|
}
|
|
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
|
|
index 71b6185b49042..42726fdf5a3af 100644
|
|
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
|
|
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
|
|
@@ -1483,6 +1483,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
|
|
if (!reload)
|
|
devlink_resources_unregister(devlink, NULL);
|
|
mlxsw_core->bus->fini(mlxsw_core->bus_priv);
|
|
+ if (!reload)
|
|
+ devlink_free(devlink);
|
|
|
|
return;
|
|
|
|
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
|
|
index b1feef473b746..ed89e669ddd5b 100644
|
|
--- a/drivers/net/ethernet/realtek/r8169_main.c
|
|
+++ b/drivers/net/ethernet/realtek/r8169_main.c
|
|
@@ -4559,7 +4559,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
|
|
}
|
|
|
|
rtl_irq_disable(tp);
|
|
- napi_schedule_irqoff(&tp->napi);
|
|
+ napi_schedule(&tp->napi);
|
|
out:
|
|
rtl_ack_events(tp, status);
|
|
|
|
@@ -4727,7 +4727,7 @@ static int rtl_open(struct net_device *dev)
|
|
rtl_request_firmware(tp);
|
|
|
|
retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt,
|
|
- IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp);
|
|
+ IRQF_SHARED, dev->name, tp);
|
|
if (retval < 0)
|
|
goto err_release_fw_2;
|
|
|
|
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
|
|
index 99f7aae102ce1..6c58ba186b2cb 100644
|
|
--- a/drivers/net/ethernet/renesas/ravb_main.c
|
|
+++ b/drivers/net/ethernet/renesas/ravb_main.c
|
|
@@ -1747,12 +1747,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req)
|
|
config.flags = 0;
|
|
config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON :
|
|
HWTSTAMP_TX_OFF;
|
|
- if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT)
|
|
+ switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) {
|
|
+ case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT:
|
|
config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
|
|
- else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL)
|
|
+ break;
|
|
+ case RAVB_RXTSTAMP_TYPE_ALL:
|
|
config.rx_filter = HWTSTAMP_FILTER_ALL;
|
|
- else
|
|
+ break;
|
|
+ default:
|
|
config.rx_filter = HWTSTAMP_FILTER_NONE;
|
|
+ }
|
|
|
|
return copy_to_user(req->ifr_data, &config, sizeof(config)) ?
|
|
-EFAULT : 0;
|
|
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
|
|
index 8e47d0112e5dc..10f910f8cbe52 100644
|
|
--- a/drivers/net/gtp.c
|
|
+++ b/drivers/net/gtp.c
|
|
@@ -663,10 +663,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
|
|
|
|
gtp = netdev_priv(dev);
|
|
|
|
- err = gtp_encap_enable(gtp, data);
|
|
- if (err < 0)
|
|
- return err;
|
|
-
|
|
if (!data[IFLA_GTP_PDP_HASHSIZE]) {
|
|
hashsize = 1024;
|
|
} else {
|
|
@@ -677,12 +673,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
|
|
|
|
err = gtp_hashtable_new(gtp, hashsize);
|
|
if (err < 0)
|
|
- goto out_encap;
|
|
+ return err;
|
|
+
|
|
+ err = gtp_encap_enable(gtp, data);
|
|
+ if (err < 0)
|
|
+ goto out_hashtable;
|
|
|
|
err = register_netdevice(dev);
|
|
if (err < 0) {
|
|
netdev_dbg(dev, "failed to register new netdev %d\n", err);
|
|
- goto out_hashtable;
|
|
+ goto out_encap;
|
|
}
|
|
|
|
gn = net_generic(dev_net(dev), gtp_net_id);
|
|
@@ -693,11 +693,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
|
|
|
|
return 0;
|
|
|
|
+out_encap:
|
|
+ gtp_encap_disable(gtp);
|
|
out_hashtable:
|
|
kfree(gtp->addr_hash);
|
|
kfree(gtp->tid_hash);
|
|
-out_encap:
|
|
- gtp_encap_disable(gtp);
|
|
return err;
|
|
}
|
|
|
|
diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
|
|
index bdbfeed359db3..41e9af35a5820 100644
|
|
--- a/drivers/net/ipa/gsi_trans.c
|
|
+++ b/drivers/net/ipa/gsi_trans.c
|
|
@@ -398,15 +398,24 @@ void gsi_trans_cmd_add(struct gsi_trans *trans, void *buf, u32 size,
|
|
|
|
/* assert(which < trans->tre_count); */
|
|
|
|
- /* Set the page information for the buffer. We also need to fill in
|
|
- * the DMA address and length for the buffer (something dma_map_sg()
|
|
- * normally does).
|
|
+ /* Commands are quite different from data transfer requests.
|
|
+ * Their payloads come from a pool whose memory is allocated
|
|
+ * using dma_alloc_coherent(). We therefore do *not* map them
|
|
+ * for DMA (unlike what we do for pages and skbs).
|
|
+ *
|
|
+ * When a transaction completes, the SGL is normally unmapped.
|
|
+ * A command transaction has direction DMA_NONE, which tells
|
|
+ * gsi_trans_complete() to skip the unmapping step.
|
|
+ *
|
|
+ * The only things we use directly in a command scatter/gather
|
|
+ * entry are the DMA address and length. We still need the SG
|
|
+ * table flags to be maintained though, so assign a NULL page
|
|
+ * pointer for that purpose.
|
|
*/
|
|
sg = &trans->sgl[which];
|
|
-
|
|
- sg_set_buf(sg, buf, size);
|
|
+ sg_assign_page(sg, NULL);
|
|
sg_dma_address(sg) = addr;
|
|
- sg_dma_len(sg) = sg->length;
|
|
+ sg_dma_len(sg) = size;
|
|
|
|
info = &trans->info[which];
|
|
info->opcode = opcode;
|
|
diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c
|
|
index 80ad0b7eaef43..f8c6027cab6b4 100644
|
|
--- a/drivers/net/wireless/intersil/p54/p54pci.c
|
|
+++ b/drivers/net/wireless/intersil/p54/p54pci.c
|
|
@@ -329,10 +329,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
|
|
struct p54p_desc *desc;
|
|
dma_addr_t mapping;
|
|
u32 idx, i;
|
|
+ __le32 device_addr;
|
|
|
|
spin_lock_irqsave(&priv->lock, flags);
|
|
idx = le32_to_cpu(ring_control->host_idx[1]);
|
|
i = idx % ARRAY_SIZE(ring_control->tx_data);
|
|
+ device_addr = ((struct p54_hdr *)skb->data)->req_id;
|
|
|
|
mapping = pci_map_single(priv->pdev, skb->data, skb->len,
|
|
PCI_DMA_TODEVICE);
|
|
@@ -346,7 +348,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
|
|
|
|
desc = &ring_control->tx_data[i];
|
|
desc->host_addr = cpu_to_le32(mapping);
|
|
- desc->device_addr = ((struct p54_hdr *)skb->data)->req_id;
|
|
+ desc->device_addr = device_addr;
|
|
desc->len = cpu_to_le16(skb->len);
|
|
desc->flags = 0;
|
|
|
|
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
|
|
index 45964acba9443..22d865ba6353d 100644
|
|
--- a/drivers/nvdimm/claim.c
|
|
+++ b/drivers/nvdimm/claim.c
|
|
@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
|
|
if (rw == READ) {
|
|
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
|
|
return -EIO;
|
|
- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
|
|
+ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
|
|
return -EIO;
|
|
return 0;
|
|
}
|
|
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
|
|
index d25e66fd942dd..5a4f588605caf 100644
|
|
--- a/drivers/nvdimm/pmem.c
|
|
+++ b/drivers/nvdimm/pmem.c
|
|
@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
|
|
while (len) {
|
|
mem = kmap_atomic(page);
|
|
chunk = min_t(unsigned int, len, PAGE_SIZE - off);
|
|
- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
|
|
+ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
|
|
kunmap_atomic(mem);
|
|
if (rem)
|
|
return BLK_STS_IOERR;
|
|
@@ -305,7 +305,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
|
|
|
|
/*
|
|
* Use the 'no check' versions of copy_from_iter_flushcache() and
|
|
- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
|
|
+ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
|
|
* checking, both file offset and device offset, is handled by
|
|
* dax_iomap_actor()
|
|
*/
|
|
@@ -318,7 +318,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
|
static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
|
|
void *addr, size_t bytes, struct iov_iter *i)
|
|
{
|
|
- return _copy_to_iter_mcsafe(addr, bytes, i);
|
|
+ return _copy_mc_to_iter(addr, bytes, i);
|
|
}
|
|
|
|
static const struct dax_operations pmem_dax_ops = {
|
|
diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
|
|
index d5f58684d962c..c79326e699e82 100644
|
|
--- a/drivers/pci/controller/pci-aardvark.c
|
|
+++ b/drivers/pci/controller/pci-aardvark.c
|
|
@@ -1068,7 +1068,9 @@ static int advk_pcie_enable_phy(struct advk_pcie *pcie)
|
|
}
|
|
|
|
ret = phy_power_on(pcie->phy);
|
|
- if (ret) {
|
|
+ if (ret == -EOPNOTSUPP) {
|
|
+ dev_warn(&pcie->pdev->dev, "PHY unsupported by firmware\n");
|
|
+ } else if (ret) {
|
|
phy_exit(pcie->phy);
|
|
return ret;
|
|
}
|
|
diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
|
|
index 1a138be8bd6a0..810f25a476321 100644
|
|
--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
|
|
+++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
|
|
@@ -26,7 +26,6 @@
|
|
#define COMPHY_SIP_POWER_ON 0x82000001
|
|
#define COMPHY_SIP_POWER_OFF 0x82000002
|
|
#define COMPHY_SIP_PLL_LOCK 0x82000003
|
|
-#define COMPHY_FW_NOT_SUPPORTED (-1)
|
|
|
|
#define COMPHY_FW_MODE_SATA 0x1
|
|
#define COMPHY_FW_MODE_SGMII 0x2
|
|
@@ -112,10 +111,19 @@ static int mvebu_a3700_comphy_smc(unsigned long function, unsigned long lane,
|
|
unsigned long mode)
|
|
{
|
|
struct arm_smccc_res res;
|
|
+ s32 ret;
|
|
|
|
arm_smccc_smc(function, lane, mode, 0, 0, 0, 0, 0, &res);
|
|
+ ret = res.a0;
|
|
|
|
- return res.a0;
|
|
+ switch (ret) {
|
|
+ case SMCCC_RET_SUCCESS:
|
|
+ return 0;
|
|
+ case SMCCC_RET_NOT_SUPPORTED:
|
|
+ return -EOPNOTSUPP;
|
|
+ default:
|
|
+ return -EINVAL;
|
|
+ }
|
|
}
|
|
|
|
static int mvebu_a3700_comphy_get_fw_mode(int lane, int port,
|
|
@@ -220,7 +228,7 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy)
|
|
}
|
|
|
|
ret = mvebu_a3700_comphy_smc(COMPHY_SIP_POWER_ON, lane->id, fw_param);
|
|
- if (ret == COMPHY_FW_NOT_SUPPORTED)
|
|
+ if (ret == -EOPNOTSUPP)
|
|
dev_err(lane->dev,
|
|
"unsupported SMC call, try updating your firmware\n");
|
|
|
|
diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
|
|
index e41367f36ee1c..53ad127b100fe 100644
|
|
--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
|
|
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
|
|
@@ -123,7 +123,6 @@
|
|
|
|
#define COMPHY_SIP_POWER_ON 0x82000001
|
|
#define COMPHY_SIP_POWER_OFF 0x82000002
|
|
-#define COMPHY_FW_NOT_SUPPORTED (-1)
|
|
|
|
/*
|
|
* A lane is described by the following bitfields:
|
|
@@ -273,10 +272,19 @@ static int mvebu_comphy_smc(unsigned long function, unsigned long phys,
|
|
unsigned long lane, unsigned long mode)
|
|
{
|
|
struct arm_smccc_res res;
|
|
+ s32 ret;
|
|
|
|
arm_smccc_smc(function, phys, lane, mode, 0, 0, 0, 0, &res);
|
|
+ ret = res.a0;
|
|
|
|
- return res.a0;
|
|
+ switch (ret) {
|
|
+ case SMCCC_RET_SUCCESS:
|
|
+ return 0;
|
|
+ case SMCCC_RET_NOT_SUPPORTED:
|
|
+ return -EOPNOTSUPP;
|
|
+ default:
|
|
+ return -EINVAL;
|
|
+ }
|
|
}
|
|
|
|
static int mvebu_comphy_get_mode(bool fw_mode, int lane, int port,
|
|
@@ -819,7 +827,7 @@ static int mvebu_comphy_power_on(struct phy *phy)
|
|
if (!ret)
|
|
return ret;
|
|
|
|
- if (ret == COMPHY_FW_NOT_SUPPORTED)
|
|
+ if (ret == -EOPNOTSUPP)
|
|
dev_err(priv->dev,
|
|
"unsupported SMC call, try updating your firmware\n");
|
|
|
|
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
|
|
index a8d1edcf252c7..64e801a3a0206 100644
|
|
--- a/drivers/tty/serial/amba-pl011.c
|
|
+++ b/drivers/tty/serial/amba-pl011.c
|
|
@@ -308,8 +308,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap,
|
|
*/
|
|
static int pl011_fifo_to_tty(struct uart_amba_port *uap)
|
|
{
|
|
- u16 status;
|
|
unsigned int ch, flag, fifotaken;
|
|
+ int sysrq;
|
|
+ u16 status;
|
|
|
|
for (fifotaken = 0; fifotaken != 256; fifotaken++) {
|
|
status = pl011_read(uap, REG_FR);
|
|
@@ -344,10 +345,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap)
|
|
flag = TTY_FRAME;
|
|
}
|
|
|
|
- if (uart_handle_sysrq_char(&uap->port, ch & 255))
|
|
- continue;
|
|
+ spin_unlock(&uap->port.lock);
|
|
+ sysrq = uart_handle_sysrq_char(&uap->port, ch & 255);
|
|
+ spin_lock(&uap->port.lock);
|
|
|
|
- uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
|
|
+ if (!sysrq)
|
|
+ uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
|
|
}
|
|
|
|
return fifotaken;
|
|
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
|
|
index ffdf6da016c21..2bb800ca5f0ca 100644
|
|
--- a/drivers/tty/serial/qcom_geni_serial.c
|
|
+++ b/drivers/tty/serial/qcom_geni_serial.c
|
|
@@ -954,7 +954,7 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport,
|
|
sampling_rate = UART_OVERSAMPLING;
|
|
/* Sampling rate is halved for IP versions >= 2.5 */
|
|
ver = geni_se_get_qup_hw_version(&port->se);
|
|
- if (GENI_SE_VERSION_MAJOR(ver) >= 2 && GENI_SE_VERSION_MINOR(ver) >= 5)
|
|
+ if (ver >= QUP_SE_VERSION_2_5)
|
|
sampling_rate /= 2;
|
|
|
|
clk_rate = get_clk_div_rate(baud, sampling_rate, &clk_div);
|
|
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
|
|
index 64a9025a87bee..1f32db7b72b2c 100644
|
|
--- a/drivers/xen/gntdev.c
|
|
+++ b/drivers/xen/gntdev.c
|
|
@@ -720,17 +720,18 @@ struct gntdev_copy_batch {
|
|
s16 __user *status[GNTDEV_COPY_BATCH];
|
|
unsigned int nr_ops;
|
|
unsigned int nr_pages;
|
|
+ bool writeable;
|
|
};
|
|
|
|
static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
|
|
- bool writeable, unsigned long *gfn)
|
|
+ unsigned long *gfn)
|
|
{
|
|
unsigned long addr = (unsigned long)virt;
|
|
struct page *page;
|
|
unsigned long xen_pfn;
|
|
int ret;
|
|
|
|
- ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
|
|
+ ret = get_user_pages_fast(addr, 1, batch->writeable ? FOLL_WRITE : 0, &page);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
@@ -746,9 +747,13 @@ static void gntdev_put_pages(struct gntdev_copy_batch *batch)
|
|
{
|
|
unsigned int i;
|
|
|
|
- for (i = 0; i < batch->nr_pages; i++)
|
|
+ for (i = 0; i < batch->nr_pages; i++) {
|
|
+ if (batch->writeable && !PageDirty(batch->pages[i]))
|
|
+ set_page_dirty_lock(batch->pages[i]);
|
|
put_page(batch->pages[i]);
|
|
+ }
|
|
batch->nr_pages = 0;
|
|
+ batch->writeable = false;
|
|
}
|
|
|
|
static int gntdev_copy(struct gntdev_copy_batch *batch)
|
|
@@ -837,8 +842,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
|
|
virt = seg->source.virt + copied;
|
|
off = (unsigned long)virt & ~XEN_PAGE_MASK;
|
|
len = min(len, (size_t)XEN_PAGE_SIZE - off);
|
|
+ batch->writeable = false;
|
|
|
|
- ret = gntdev_get_page(batch, virt, false, &gfn);
|
|
+ ret = gntdev_get_page(batch, virt, &gfn);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
@@ -856,8 +862,9 @@ static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
|
|
virt = seg->dest.virt + copied;
|
|
off = (unsigned long)virt & ~XEN_PAGE_MASK;
|
|
len = min(len, (size_t)XEN_PAGE_SIZE - off);
|
|
+ batch->writeable = true;
|
|
|
|
- ret = gntdev_get_page(batch, virt, true, &gfn);
|
|
+ ret = gntdev_get_page(batch, virt, &gfn);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
|
|
index 28bb5689333a5..15880a68faadc 100644
|
|
--- a/fs/efivarfs/super.c
|
|
+++ b/fs/efivarfs/super.c
|
|
@@ -141,6 +141,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
|
|
|
|
name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
|
|
|
|
+ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */
|
|
+ strreplace(name, '/', '!');
|
|
+
|
|
inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
|
|
is_removable);
|
|
if (!inode)
|
|
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
|
|
index 87e437e7b34f2..f86e3247febc1 100644
|
|
--- a/fs/erofs/xattr.c
|
|
+++ b/fs/erofs/xattr.c
|
|
@@ -473,8 +473,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
|
|
return -EOPNOTSUPP;
|
|
break;
|
|
case EROFS_XATTR_INDEX_TRUSTED:
|
|
- if (!capable(CAP_SYS_ADMIN))
|
|
- return -EPERM;
|
|
break;
|
|
case EROFS_XATTR_INDEX_SECURITY:
|
|
break;
|
|
diff --git a/fs/exec.c b/fs/exec.c
|
|
index e6e8a9a703278..78976a3260c6a 100644
|
|
--- a/fs/exec.c
|
|
+++ b/fs/exec.c
|
|
@@ -62,6 +62,7 @@
|
|
#include <linux/oom.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/vmalloc.h>
|
|
+#include <linux/io_uring.h>
|
|
|
|
#include <linux/uaccess.h>
|
|
#include <asm/mmu_context.h>
|
|
@@ -1847,6 +1848,11 @@ static int __do_execve_file(int fd, struct filename *filename,
|
|
* further execve() calls fail. */
|
|
current->flags &= ~PF_NPROC_EXCEEDED;
|
|
|
|
+ /*
|
|
+ * Cancel any io_uring activity across execve
|
|
+ */
|
|
+ io_uring_task_cancel();
|
|
+
|
|
retval = unshare_files(&displaced);
|
|
if (retval)
|
|
goto out_ret;
|
|
diff --git a/fs/file.c b/fs/file.c
|
|
index abb8b7081d7a4..8e2c532bb02e3 100644
|
|
--- a/fs/file.c
|
|
+++ b/fs/file.c
|
|
@@ -18,6 +18,7 @@
|
|
#include <linux/bitops.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/rcupdate.h>
|
|
+#include <linux/io_uring.h>
|
|
|
|
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
|
|
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
|
|
@@ -439,6 +440,7 @@ void exit_files(struct task_struct *tsk)
|
|
struct files_struct * files = tsk->files;
|
|
|
|
if (files) {
|
|
+ io_uring_files_cancel(files);
|
|
task_lock(tsk);
|
|
tsk->files = NULL;
|
|
task_unlock(tsk);
|
|
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
|
|
index 02b3c36b36766..5078a6ca7dfcd 100644
|
|
--- a/fs/fuse/dev.c
|
|
+++ b/fs/fuse/dev.c
|
|
@@ -785,15 +785,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
|
struct page *newpage;
|
|
struct pipe_buffer *buf = cs->pipebufs;
|
|
|
|
+ get_page(oldpage);
|
|
err = unlock_request(cs->req);
|
|
if (err)
|
|
- return err;
|
|
+ goto out_put_old;
|
|
|
|
fuse_copy_finish(cs);
|
|
|
|
err = pipe_buf_confirm(cs->pipe, buf);
|
|
if (err)
|
|
- return err;
|
|
+ goto out_put_old;
|
|
|
|
BUG_ON(!cs->nr_segs);
|
|
cs->currbuf = buf;
|
|
@@ -833,7 +834,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
|
err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
|
|
if (err) {
|
|
unlock_page(newpage);
|
|
- return err;
|
|
+ goto out_put_old;
|
|
}
|
|
|
|
get_page(newpage);
|
|
@@ -852,14 +853,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
|
|
if (err) {
|
|
unlock_page(newpage);
|
|
put_page(newpage);
|
|
- return err;
|
|
+ goto out_put_old;
|
|
}
|
|
|
|
unlock_page(oldpage);
|
|
+ /* Drop ref for ap->pages[] array */
|
|
put_page(oldpage);
|
|
cs->len = 0;
|
|
|
|
- return 0;
|
|
+ err = 0;
|
|
+out_put_old:
|
|
+ /* Drop ref obtained in this function */
|
|
+ put_page(oldpage);
|
|
+ return err;
|
|
|
|
out_fallback_unlock:
|
|
unlock_page(newpage);
|
|
@@ -868,10 +874,10 @@ out_fallback:
|
|
cs->offset = buf->offset;
|
|
|
|
err = lock_request(cs->req);
|
|
- if (err)
|
|
- return err;
|
|
+ if (!err)
|
|
+ err = 1;
|
|
|
|
- return 1;
|
|
+ goto out_put_old;
|
|
}
|
|
|
|
static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
|
|
@@ -883,14 +889,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
|
|
if (cs->nr_segs >= cs->pipe->max_usage)
|
|
return -EIO;
|
|
|
|
+ get_page(page);
|
|
err = unlock_request(cs->req);
|
|
- if (err)
|
|
+ if (err) {
|
|
+ put_page(page);
|
|
return err;
|
|
+ }
|
|
|
|
fuse_copy_finish(cs);
|
|
|
|
buf = cs->pipebufs;
|
|
- get_page(page);
|
|
buf->page = page;
|
|
buf->offset = offset;
|
|
buf->len = count;
|
|
diff --git a/fs/io-wq.c b/fs/io-wq.c
|
|
index cb9e5a444fba7..56a229621a831 100644
|
|
--- a/fs/io-wq.c
|
|
+++ b/fs/io-wq.c
|
|
@@ -60,6 +60,7 @@ struct io_worker {
|
|
const struct cred *cur_creds;
|
|
const struct cred *saved_creds;
|
|
struct files_struct *restore_files;
|
|
+ struct nsproxy *restore_nsproxy;
|
|
struct fs_struct *restore_fs;
|
|
};
|
|
|
|
@@ -87,7 +88,7 @@ enum {
|
|
*/
|
|
struct io_wqe {
|
|
struct {
|
|
- spinlock_t lock;
|
|
+ raw_spinlock_t lock;
|
|
struct io_wq_work_list work_list;
|
|
unsigned long hash_map;
|
|
unsigned flags;
|
|
@@ -148,11 +149,12 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
|
|
|
|
if (current->files != worker->restore_files) {
|
|
__acquire(&wqe->lock);
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
dropped_lock = true;
|
|
|
|
task_lock(current);
|
|
current->files = worker->restore_files;
|
|
+ current->nsproxy = worker->restore_nsproxy;
|
|
task_unlock(current);
|
|
}
|
|
|
|
@@ -166,7 +168,7 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
|
|
if (worker->mm) {
|
|
if (!dropped_lock) {
|
|
__acquire(&wqe->lock);
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
dropped_lock = true;
|
|
}
|
|
__set_current_state(TASK_RUNNING);
|
|
@@ -200,7 +202,6 @@ static void io_worker_exit(struct io_worker *worker)
|
|
{
|
|
struct io_wqe *wqe = worker->wqe;
|
|
struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
|
|
- unsigned nr_workers;
|
|
|
|
/*
|
|
* If we're not at zero, someone else is holding a brief reference
|
|
@@ -220,23 +221,19 @@ static void io_worker_exit(struct io_worker *worker)
|
|
worker->flags = 0;
|
|
preempt_enable();
|
|
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
hlist_nulls_del_rcu(&worker->nulls_node);
|
|
list_del_rcu(&worker->all_list);
|
|
if (__io_worker_unuse(wqe, worker)) {
|
|
__release(&wqe->lock);
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
}
|
|
acct->nr_workers--;
|
|
- nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
|
|
- wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
|
|
- spin_unlock_irq(&wqe->lock);
|
|
-
|
|
- /* all workers gone, wq exit can proceed */
|
|
- if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
|
|
- complete(&wqe->wq->done);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
|
|
kfree_rcu(worker, rcu);
|
|
+ if (refcount_dec_and_test(&wqe->wq->refs))
|
|
+ complete(&wqe->wq->done);
|
|
}
|
|
|
|
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
|
|
@@ -318,6 +315,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
|
|
|
|
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
|
|
worker->restore_files = current->files;
|
|
+ worker->restore_nsproxy = current->nsproxy;
|
|
worker->restore_fs = current->fs;
|
|
io_wqe_inc_running(wqe, worker);
|
|
}
|
|
@@ -454,6 +452,7 @@ static void io_impersonate_work(struct io_worker *worker,
|
|
if (work->files && current->files != work->files) {
|
|
task_lock(current);
|
|
current->files = work->files;
|
|
+ current->nsproxy = work->nsproxy;
|
|
task_unlock(current);
|
|
}
|
|
if (work->fs && current->fs != work->fs)
|
|
@@ -504,7 +503,7 @@ get_next:
|
|
else if (!wq_list_empty(&wqe->work_list))
|
|
wqe->flags |= IO_WQE_FLAG_STALLED;
|
|
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
if (!work)
|
|
break;
|
|
io_assign_current_work(worker, work);
|
|
@@ -539,7 +538,7 @@ get_next:
|
|
io_wqe_enqueue(wqe, linked);
|
|
|
|
if (hash != -1U && !next_hashed) {
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
wqe->hash_map &= ~BIT_ULL(hash);
|
|
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
|
/* dependent work is not hashed */
|
|
@@ -547,11 +546,11 @@ get_next:
|
|
/* skip unnecessary unlock-lock wqe->lock */
|
|
if (!work)
|
|
goto get_next;
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
}
|
|
} while (work);
|
|
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
} while (1);
|
|
}
|
|
|
|
@@ -566,7 +565,7 @@ static int io_wqe_worker(void *data)
|
|
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
loop:
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
if (io_wqe_run_queue(wqe)) {
|
|
__set_current_state(TASK_RUNNING);
|
|
io_worker_handle_work(worker);
|
|
@@ -577,7 +576,7 @@ loop:
|
|
__release(&wqe->lock);
|
|
goto loop;
|
|
}
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
if (signal_pending(current))
|
|
flush_signals(current);
|
|
if (schedule_timeout(WORKER_IDLE_TIMEOUT))
|
|
@@ -589,11 +588,11 @@ loop:
|
|
}
|
|
|
|
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
if (!wq_list_empty(&wqe->work_list))
|
|
io_worker_handle_work(worker);
|
|
else
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
}
|
|
|
|
io_worker_exit(worker);
|
|
@@ -633,14 +632,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
|
|
|
|
worker->flags &= ~IO_WORKER_F_RUNNING;
|
|
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
io_wqe_dec_running(wqe, worker);
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
}
|
|
|
|
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
|
{
|
|
- struct io_wqe_acct *acct =&wqe->acct[index];
|
|
+ struct io_wqe_acct *acct = &wqe->acct[index];
|
|
struct io_worker *worker;
|
|
|
|
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
|
|
@@ -659,7 +658,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
|
return false;
|
|
}
|
|
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
|
|
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
|
|
worker->flags |= IO_WORKER_F_FREE;
|
|
@@ -668,11 +667,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
|
|
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
|
|
worker->flags |= IO_WORKER_F_FIXED;
|
|
acct->nr_workers++;
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
|
|
if (index == IO_WQ_ACCT_UNBOUND)
|
|
atomic_inc(&wq->user->processes);
|
|
|
|
+ refcount_inc(&wq->refs);
|
|
wake_up_process(worker->task);
|
|
return true;
|
|
}
|
|
@@ -688,28 +688,63 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
|
|
return acct->nr_workers < acct->max_workers;
|
|
}
|
|
|
|
+static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
|
|
+{
|
|
+ send_sig(SIGINT, worker->task, 1);
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Iterate the passed in list and call the specific function for each
|
|
+ * worker that isn't exiting
|
|
+ */
|
|
+static bool io_wq_for_each_worker(struct io_wqe *wqe,
|
|
+ bool (*func)(struct io_worker *, void *),
|
|
+ void *data)
|
|
+{
|
|
+ struct io_worker *worker;
|
|
+ bool ret = false;
|
|
+
|
|
+ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
|
|
+ if (io_worker_get(worker)) {
|
|
+ /* no task if node is/was offline */
|
|
+ if (worker->task)
|
|
+ ret = func(worker, data);
|
|
+ io_worker_release(worker);
|
|
+ if (ret)
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
|
+{
|
|
+ wake_up_process(worker->task);
|
|
+ return false;
|
|
+}
|
|
+
|
|
/*
|
|
* Manager thread. Tasked with creating new workers, if we need them.
|
|
*/
|
|
static int io_wq_manager(void *data)
|
|
{
|
|
struct io_wq *wq = data;
|
|
- int workers_to_create = num_possible_nodes();
|
|
int node;
|
|
|
|
/* create fixed workers */
|
|
- refcount_set(&wq->refs, workers_to_create);
|
|
+ refcount_set(&wq->refs, 1);
|
|
for_each_node(node) {
|
|
if (!node_online(node))
|
|
continue;
|
|
- if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
|
|
- goto err;
|
|
- workers_to_create--;
|
|
+ if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
|
|
+ continue;
|
|
+ set_bit(IO_WQ_BIT_ERROR, &wq->state);
|
|
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
|
|
+ goto out;
|
|
}
|
|
|
|
- while (workers_to_create--)
|
|
- refcount_dec(&wq->refs);
|
|
-
|
|
complete(&wq->done);
|
|
|
|
while (!kthread_should_stop()) {
|
|
@@ -723,12 +758,12 @@ static int io_wq_manager(void *data)
|
|
if (!node_online(node))
|
|
continue;
|
|
|
|
- spin_lock_irq(&wqe->lock);
|
|
+ raw_spin_lock_irq(&wqe->lock);
|
|
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
|
|
fork_worker[IO_WQ_ACCT_BOUND] = true;
|
|
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
|
|
fork_worker[IO_WQ_ACCT_UNBOUND] = true;
|
|
- spin_unlock_irq(&wqe->lock);
|
|
+ raw_spin_unlock_irq(&wqe->lock);
|
|
if (fork_worker[IO_WQ_ACCT_BOUND])
|
|
create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
|
|
if (fork_worker[IO_WQ_ACCT_UNBOUND])
|
|
@@ -741,12 +776,18 @@ static int io_wq_manager(void *data)
|
|
if (current->task_works)
|
|
task_work_run();
|
|
|
|
- return 0;
|
|
-err:
|
|
- set_bit(IO_WQ_BIT_ERROR, &wq->state);
|
|
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
|
|
- if (refcount_sub_and_test(workers_to_create, &wq->refs))
|
|
+out:
|
|
+ if (refcount_dec_and_test(&wq->refs)) {
|
|
complete(&wq->done);
|
|
+ return 0;
|
|
+ }
|
|
+ /* if ERROR is set and we get here, we have workers to wake */
|
|
+ if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
|
|
+ rcu_read_lock();
|
|
+ for_each_node(node)
|
|
+ io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
|
|
+ rcu_read_unlock();
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
@@ -825,10 +866,10 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
|
|
}
|
|
|
|
work_flags = work->flags;
|
|
- spin_lock_irqsave(&wqe->lock, flags);
|
|
+ raw_spin_lock_irqsave(&wqe->lock, flags);
|
|
io_wqe_insert_work(wqe, work);
|
|
wqe->flags &= ~IO_WQE_FLAG_STALLED;
|
|
- spin_unlock_irqrestore(&wqe->lock, flags);
|
|
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
|
|
|
|
if ((work_flags & IO_WQ_WORK_CONCURRENT) ||
|
|
!atomic_read(&acct->nr_running))
|
|
@@ -854,37 +895,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
|
|
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
|
|
}
|
|
|
|
-static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
|
|
-{
|
|
- send_sig(SIGINT, worker->task, 1);
|
|
- return false;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Iterate the passed in list and call the specific function for each
|
|
- * worker that isn't exiting
|
|
- */
|
|
-static bool io_wq_for_each_worker(struct io_wqe *wqe,
|
|
- bool (*func)(struct io_worker *, void *),
|
|
- void *data)
|
|
-{
|
|
- struct io_worker *worker;
|
|
- bool ret = false;
|
|
-
|
|
- list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
|
|
- if (io_worker_get(worker)) {
|
|
- /* no task if node is/was offline */
|
|
- if (worker->task)
|
|
- ret = func(worker, data);
|
|
- io_worker_release(worker);
|
|
- if (ret)
|
|
- break;
|
|
- }
|
|
- }
|
|
-
|
|
- return ret;
|
|
-}
|
|
-
|
|
void io_wq_cancel_all(struct io_wq *wq)
|
|
{
|
|
int node;
|
|
@@ -955,13 +965,13 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
|
|
unsigned long flags;
|
|
|
|
retry:
|
|
- spin_lock_irqsave(&wqe->lock, flags);
|
|
+ raw_spin_lock_irqsave(&wqe->lock, flags);
|
|
wq_list_for_each(node, prev, &wqe->work_list) {
|
|
work = container_of(node, struct io_wq_work, list);
|
|
if (!match->fn(work, match->data))
|
|
continue;
|
|
io_wqe_remove_pending(wqe, work, prev);
|
|
- spin_unlock_irqrestore(&wqe->lock, flags);
|
|
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
|
|
io_run_cancel(work, wqe);
|
|
match->nr_pending++;
|
|
if (!match->cancel_all)
|
|
@@ -970,7 +980,7 @@ retry:
|
|
/* not safe to continue after unlock */
|
|
goto retry;
|
|
}
|
|
- spin_unlock_irqrestore(&wqe->lock, flags);
|
|
+ raw_spin_unlock_irqrestore(&wqe->lock, flags);
|
|
}
|
|
|
|
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
|
|
@@ -1078,7 +1088,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|
}
|
|
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
|
|
wqe->wq = wq;
|
|
- spin_lock_init(&wqe->lock);
|
|
+ raw_spin_lock_init(&wqe->lock);
|
|
INIT_WQ_LIST(&wqe->work_list);
|
|
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
|
|
INIT_LIST_HEAD(&wqe->all_list);
|
|
@@ -1117,12 +1127,6 @@ bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
|
|
return refcount_inc_not_zero(&wq->use_refs);
|
|
}
|
|
|
|
-static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
|
-{
|
|
- wake_up_process(worker->task);
|
|
- return false;
|
|
-}
|
|
-
|
|
static void __io_wq_destroy(struct io_wq *wq)
|
|
{
|
|
int node;
|
|
diff --git a/fs/io-wq.h b/fs/io-wq.h
|
|
index 071f1a9978002..9be6def2b5a6f 100644
|
|
--- a/fs/io-wq.h
|
|
+++ b/fs/io-wq.h
|
|
@@ -88,6 +88,7 @@ struct io_wq_work {
|
|
struct files_struct *files;
|
|
struct mm_struct *mm;
|
|
const struct cred *creds;
|
|
+ struct nsproxy *nsproxy;
|
|
struct fs_struct *fs;
|
|
unsigned flags;
|
|
};
|
|
diff --git a/fs/io_uring.c b/fs/io_uring.c
|
|
index d2bb2ae9551f0..8e9c58fa76362 100644
|
|
--- a/fs/io_uring.c
|
|
+++ b/fs/io_uring.c
|
|
@@ -78,6 +78,7 @@
|
|
#include <linux/fs_struct.h>
|
|
#include <linux/splice.h>
|
|
#include <linux/task_work.h>
|
|
+#include <linux/io_uring.h>
|
|
|
|
#define CREATE_TRACE_POINTS
|
|
#include <trace/events/io_uring.h>
|
|
@@ -264,7 +265,16 @@ struct io_ring_ctx {
|
|
/* IO offload */
|
|
struct io_wq *io_wq;
|
|
struct task_struct *sqo_thread; /* if using sq thread polling */
|
|
- struct mm_struct *sqo_mm;
|
|
+
|
|
+ /*
|
|
+ * For SQPOLL usage - we hold a reference to the parent task, so we
|
|
+ * have access to the ->files
|
|
+ */
|
|
+ struct task_struct *sqo_task;
|
|
+
|
|
+ /* Only used for accounting purposes */
|
|
+ struct mm_struct *mm_account;
|
|
+
|
|
wait_queue_head_t sqo_wait;
|
|
|
|
/*
|
|
@@ -274,8 +284,6 @@ struct io_ring_ctx {
|
|
*/
|
|
struct fixed_file_data *file_data;
|
|
unsigned nr_user_files;
|
|
- int ring_fd;
|
|
- struct file *ring_file;
|
|
|
|
/* if used, fixed mapped user buffers */
|
|
unsigned nr_user_bufs;
|
|
@@ -541,7 +549,6 @@ enum {
|
|
REQ_F_NO_FILE_TABLE_BIT,
|
|
REQ_F_QUEUE_TIMEOUT_BIT,
|
|
REQ_F_WORK_INITIALIZED_BIT,
|
|
- REQ_F_TASK_PINNED_BIT,
|
|
|
|
/* not a real bit, just to check we're not overflowing the space */
|
|
__REQ_F_LAST_BIT,
|
|
@@ -599,8 +606,6 @@ enum {
|
|
REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
|
|
/* io_wq_work is initialized */
|
|
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
|
|
- /* req->task is refcounted */
|
|
- REQ_F_TASK_PINNED = BIT(REQ_F_TASK_PINNED_BIT),
|
|
};
|
|
|
|
struct async_poll {
|
|
@@ -915,21 +920,6 @@ struct sock *io_uring_get_socket(struct file *file)
|
|
}
|
|
EXPORT_SYMBOL(io_uring_get_socket);
|
|
|
|
-static void io_get_req_task(struct io_kiocb *req)
|
|
-{
|
|
- if (req->flags & REQ_F_TASK_PINNED)
|
|
- return;
|
|
- get_task_struct(req->task);
|
|
- req->flags |= REQ_F_TASK_PINNED;
|
|
-}
|
|
-
|
|
-/* not idempotent -- it doesn't clear REQ_F_TASK_PINNED */
|
|
-static void __io_put_req_task(struct io_kiocb *req)
|
|
-{
|
|
- if (req->flags & REQ_F_TASK_PINNED)
|
|
- put_task_struct(req->task);
|
|
-}
|
|
-
|
|
static void io_file_put_work(struct work_struct *work);
|
|
|
|
/*
|
|
@@ -1141,14 +1131,34 @@ static void io_kill_timeout(struct io_kiocb *req)
|
|
}
|
|
}
|
|
|
|
-static void io_kill_timeouts(struct io_ring_ctx *ctx)
|
|
+static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk)
|
|
+{
|
|
+ struct io_ring_ctx *ctx = req->ctx;
|
|
+
|
|
+ if (!tsk || req->task == tsk)
|
|
+ return true;
|
|
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && req->task == ctx->sqo_thread)
|
|
+ return true;
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Returns true if we found and killed one or more timeouts
|
|
+ */
|
|
+static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk)
|
|
{
|
|
struct io_kiocb *req, *tmp;
|
|
+ int canceled = 0;
|
|
|
|
spin_lock_irq(&ctx->completion_lock);
|
|
- list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
|
|
- io_kill_timeout(req);
|
|
+ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list) {
|
|
+ if (io_task_match(req, tsk)) {
|
|
+ io_kill_timeout(req);
|
|
+ canceled++;
|
|
+ }
|
|
+ }
|
|
spin_unlock_irq(&ctx->completion_lock);
|
|
+ return canceled != 0;
|
|
}
|
|
|
|
static void __io_queue_deferred(struct io_ring_ctx *ctx)
|
|
@@ -1229,12 +1239,24 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
|
|
eventfd_signal(ctx->cq_ev_fd, 1);
|
|
}
|
|
|
|
+static inline bool io_match_files(struct io_kiocb *req,
|
|
+ struct files_struct *files)
|
|
+{
|
|
+ if (!files)
|
|
+ return true;
|
|
+ if (req->flags & REQ_F_WORK_INITIALIZED)
|
|
+ return req->work.files == files;
|
|
+ return false;
|
|
+}
|
|
+
|
|
/* Returns true if there are no backlogged entries after the flush */
|
|
-static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
|
|
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
|
|
+ struct task_struct *tsk,
|
|
+ struct files_struct *files)
|
|
{
|
|
struct io_rings *rings = ctx->rings;
|
|
+ struct io_kiocb *req, *tmp;
|
|
struct io_uring_cqe *cqe;
|
|
- struct io_kiocb *req;
|
|
unsigned long flags;
|
|
LIST_HEAD(list);
|
|
|
|
@@ -1253,7 +1275,12 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
|
|
ctx->cq_overflow_flushed = 1;
|
|
|
|
cqe = NULL;
|
|
- while (!list_empty(&ctx->cq_overflow_list)) {
|
|
+ list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, list) {
|
|
+ if (tsk && req->task != tsk)
|
|
+ continue;
|
|
+ if (!io_match_files(req, files))
|
|
+ continue;
|
|
+
|
|
cqe = io_get_cqring(ctx);
|
|
if (!cqe && !force)
|
|
break;
|
|
@@ -1307,7 +1334,12 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
|
|
WRITE_ONCE(cqe->user_data, req->user_data);
|
|
WRITE_ONCE(cqe->res, res);
|
|
WRITE_ONCE(cqe->flags, cflags);
|
|
- } else if (ctx->cq_overflow_flushed) {
|
|
+ } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
|
|
+ /*
|
|
+ * If we're in ring overflow flush mode, or in task cancel mode,
|
|
+ * then we cannot store the request for later flushing, we need
|
|
+ * to drop it on the floor.
|
|
+ */
|
|
WRITE_ONCE(ctx->rings->cq_overflow,
|
|
atomic_inc_return(&ctx->cached_cq_overflow));
|
|
} else {
|
|
@@ -1412,15 +1444,35 @@ static inline void io_put_file(struct io_kiocb *req, struct file *file,
|
|
fput(file);
|
|
}
|
|
|
|
+static void io_req_drop_files(struct io_kiocb *req)
|
|
+{
|
|
+ struct io_ring_ctx *ctx = req->ctx;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&ctx->inflight_lock, flags);
|
|
+ list_del(&req->inflight_entry);
|
|
+ if (waitqueue_active(&ctx->inflight_wait))
|
|
+ wake_up(&ctx->inflight_wait);
|
|
+ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
|
|
+ req->flags &= ~REQ_F_INFLIGHT;
|
|
+ put_files_struct(req->work.files);
|
|
+ put_nsproxy(req->work.nsproxy);
|
|
+ req->work.files = NULL;
|
|
+}
|
|
+
|
|
static void __io_req_aux_free(struct io_kiocb *req)
|
|
{
|
|
+ struct io_uring_task *tctx = req->task->io_uring;
|
|
if (req->flags & REQ_F_NEED_CLEANUP)
|
|
io_cleanup_req(req);
|
|
|
|
kfree(req->io);
|
|
if (req->file)
|
|
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
|
|
- __io_put_req_task(req);
|
|
+ atomic_long_inc(&tctx->req_complete);
|
|
+ if (tctx->in_idle)
|
|
+ wake_up(&tctx->wait);
|
|
+ put_task_struct(req->task);
|
|
io_req_work_drop_env(req);
|
|
}
|
|
|
|
@@ -1428,16 +1480,8 @@ static void __io_free_req(struct io_kiocb *req)
|
|
{
|
|
__io_req_aux_free(req);
|
|
|
|
- if (req->flags & REQ_F_INFLIGHT) {
|
|
- struct io_ring_ctx *ctx = req->ctx;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ctx->inflight_lock, flags);
|
|
- list_del(&req->inflight_entry);
|
|
- if (waitqueue_active(&ctx->inflight_wait))
|
|
- wake_up(&ctx->inflight_wait);
|
|
- spin_unlock_irqrestore(&ctx->inflight_lock, flags);
|
|
- }
|
|
+ if (req->flags & REQ_F_INFLIGHT)
|
|
+ io_req_drop_files(req);
|
|
|
|
percpu_ref_put(&req->ctx->refs);
|
|
if (likely(!io_is_fallback_req(req)))
|
|
@@ -1717,7 +1761,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
|
|
if (noflush && !list_empty(&ctx->cq_overflow_list))
|
|
return -1U;
|
|
|
|
- io_cqring_overflow_flush(ctx, false);
|
|
+ io_cqring_overflow_flush(ctx, false, NULL, NULL);
|
|
}
|
|
|
|
/* See comment at the top of this file */
|
|
@@ -1738,7 +1782,7 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
|
|
if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req))
|
|
return false;
|
|
|
|
- if (req->file || req->io)
|
|
+ if (req->file || req->io || req->task)
|
|
rb->need_iter++;
|
|
|
|
rb->reqs[rb->to_free++] = req;
|
|
@@ -1762,6 +1806,12 @@ static int io_put_kbuf(struct io_kiocb *req)
|
|
|
|
static inline bool io_run_task_work(void)
|
|
{
|
|
+ /*
|
|
+ * Not safe to run on exiting task, and the task_work handling will
|
|
+ * not add work to such a task.
|
|
+ */
|
|
+ if (unlikely(current->flags & PF_EXITING))
|
|
+ return false;
|
|
if (current->task_works) {
|
|
__set_current_state(TASK_RUNNING);
|
|
task_work_run();
|
|
@@ -3492,8 +3542,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|
return -EBADF;
|
|
|
|
req->close.fd = READ_ONCE(sqe->fd);
|
|
- if ((req->file && req->file->f_op == &io_uring_fops) ||
|
|
- req->close.fd == req->ctx->ring_fd)
|
|
+ if ((req->file && req->file->f_op == &io_uring_fops))
|
|
return -EBADF;
|
|
|
|
req->close.put_file = NULL;
|
|
@@ -4397,9 +4446,10 @@ static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
|
|
{
|
|
if (io_op_defs[req->opcode].needs_mm && !current->mm) {
|
|
if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
|
|
- !mmget_not_zero(ctx->sqo_mm)))
|
|
+ !ctx->sqo_task->mm ||
|
|
+ !mmget_not_zero(ctx->sqo_task->mm)))
|
|
return -EFAULT;
|
|
- kthread_use_mm(ctx->sqo_mm);
|
|
+ kthread_use_mm(ctx->sqo_task->mm);
|
|
}
|
|
|
|
return 0;
|
|
@@ -4550,7 +4600,6 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
|
|
if (req->flags & REQ_F_WORK_INITIALIZED)
|
|
memcpy(&apoll->work, &req->work, sizeof(req->work));
|
|
|
|
- io_get_req_task(req);
|
|
req->apoll = apoll;
|
|
INIT_HLIST_NODE(&req->hash_node);
|
|
|
|
@@ -4635,7 +4684,10 @@ static bool io_poll_remove_one(struct io_kiocb *req)
|
|
return do_complete;
|
|
}
|
|
|
|
-static void io_poll_remove_all(struct io_ring_ctx *ctx)
|
|
+/*
|
|
+ * Returns true if we found and killed one or more poll requests
|
|
+ */
|
|
+static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk)
|
|
{
|
|
struct hlist_node *tmp;
|
|
struct io_kiocb *req;
|
|
@@ -4646,13 +4698,17 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
|
|
struct hlist_head *list;
|
|
|
|
list = &ctx->cancel_hash[i];
|
|
- hlist_for_each_entry_safe(req, tmp, list, hash_node)
|
|
- posted += io_poll_remove_one(req);
|
|
+ hlist_for_each_entry_safe(req, tmp, list, hash_node) {
|
|
+ if (io_task_match(req, tsk))
|
|
+ posted += io_poll_remove_one(req);
|
|
+ }
|
|
}
|
|
spin_unlock_irq(&ctx->completion_lock);
|
|
|
|
if (posted)
|
|
io_cqring_ev_posted(ctx);
|
|
+
|
|
+ return posted != 0;
|
|
}
|
|
|
|
static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
|
|
@@ -4738,8 +4794,6 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
|
|
|
events = READ_ONCE(sqe->poll_events);
|
|
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
|
|
-
|
|
- io_get_req_task(req);
|
|
return 0;
|
|
}
|
|
|
|
@@ -5626,32 +5680,20 @@ static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
|
|
|
|
static int io_grab_files(struct io_kiocb *req)
|
|
{
|
|
- int ret = -EBADF;
|
|
struct io_ring_ctx *ctx = req->ctx;
|
|
|
|
if (req->work.files || (req->flags & REQ_F_NO_FILE_TABLE))
|
|
return 0;
|
|
- if (!ctx->ring_file)
|
|
- return -EBADF;
|
|
|
|
- rcu_read_lock();
|
|
+ req->work.files = get_files_struct(current);
|
|
+ get_nsproxy(current->nsproxy);
|
|
+ req->work.nsproxy = current->nsproxy;
|
|
+ req->flags |= REQ_F_INFLIGHT;
|
|
+
|
|
spin_lock_irq(&ctx->inflight_lock);
|
|
- /*
|
|
- * We use the f_ops->flush() handler to ensure that we can flush
|
|
- * out work accessing these files if the fd is closed. Check if
|
|
- * the fd has changed since we started down this path, and disallow
|
|
- * this operation if it has.
|
|
- */
|
|
- if (fcheck(ctx->ring_fd) == ctx->ring_file) {
|
|
- list_add(&req->inflight_entry, &ctx->inflight_list);
|
|
- req->flags |= REQ_F_INFLIGHT;
|
|
- req->work.files = current->files;
|
|
- ret = 0;
|
|
- }
|
|
+ list_add(&req->inflight_entry, &ctx->inflight_list);
|
|
spin_unlock_irq(&ctx->inflight_lock);
|
|
- rcu_read_unlock();
|
|
-
|
|
- return ret;
|
|
+ return 0;
|
|
}
|
|
|
|
static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
|
|
@@ -6021,6 +6063,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|
/* one is dropped after submission, the other at completion */
|
|
refcount_set(&req->refs, 2);
|
|
req->task = current;
|
|
+ get_task_struct(req->task);
|
|
+ atomic_long_inc(&req->task->io_uring->req_issue);
|
|
req->result = 0;
|
|
|
|
if (unlikely(req->opcode >= IORING_OP_LAST))
|
|
@@ -6056,8 +6100,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|
return io_req_set_file(state, req, READ_ONCE(sqe->fd));
|
|
}
|
|
|
|
-static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
|
|
- struct file *ring_file, int ring_fd)
|
|
+static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
|
|
{
|
|
struct io_submit_state state, *statep = NULL;
|
|
struct io_kiocb *link = NULL;
|
|
@@ -6066,7 +6109,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
|
|
/* if we have a backlog and couldn't flush it all, return BUSY */
|
|
if (test_bit(0, &ctx->sq_check_overflow)) {
|
|
if (!list_empty(&ctx->cq_overflow_list) &&
|
|
- !io_cqring_overflow_flush(ctx, false))
|
|
+ !io_cqring_overflow_flush(ctx, false, NULL, NULL))
|
|
return -EBUSY;
|
|
}
|
|
|
|
@@ -6081,9 +6124,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
|
|
statep = &state;
|
|
}
|
|
|
|
- ctx->ring_fd = ring_fd;
|
|
- ctx->ring_file = ring_file;
|
|
-
|
|
for (i = 0; i < nr; i++) {
|
|
const struct io_uring_sqe *sqe;
|
|
struct io_kiocb *req;
|
|
@@ -6244,7 +6284,7 @@ static int io_sq_thread(void *data)
|
|
|
|
mutex_lock(&ctx->uring_lock);
|
|
if (likely(!percpu_ref_is_dying(&ctx->refs)))
|
|
- ret = io_submit_sqes(ctx, to_submit, NULL, -1);
|
|
+ ret = io_submit_sqes(ctx, to_submit);
|
|
mutex_unlock(&ctx->uring_lock);
|
|
timeout = jiffies + ctx->sq_thread_idle;
|
|
}
|
|
@@ -7073,14 +7113,38 @@ out_fput:
|
|
return ret;
|
|
}
|
|
|
|
+static int io_uring_alloc_task_context(struct task_struct *task)
|
|
+{
|
|
+ struct io_uring_task *tctx;
|
|
+
|
|
+ tctx = kmalloc(sizeof(*tctx), GFP_KERNEL);
|
|
+ if (unlikely(!tctx))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ xa_init(&tctx->xa);
|
|
+ init_waitqueue_head(&tctx->wait);
|
|
+ tctx->last = NULL;
|
|
+ tctx->in_idle = 0;
|
|
+ atomic_long_set(&tctx->req_issue, 0);
|
|
+ atomic_long_set(&tctx->req_complete, 0);
|
|
+ task->io_uring = tctx;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void __io_uring_free(struct task_struct *tsk)
|
|
+{
|
|
+ struct io_uring_task *tctx = tsk->io_uring;
|
|
+
|
|
+ WARN_ON_ONCE(!xa_empty(&tctx->xa));
|
|
+ kfree(tctx);
|
|
+ tsk->io_uring = NULL;
|
|
+}
|
|
+
|
|
static int io_sq_offload_start(struct io_ring_ctx *ctx,
|
|
struct io_uring_params *p)
|
|
{
|
|
int ret;
|
|
|
|
- mmgrab(current->mm);
|
|
- ctx->sqo_mm = current->mm;
|
|
-
|
|
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
|
ret = -EPERM;
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
@@ -7111,6 +7175,9 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
|
|
ctx->sqo_thread = NULL;
|
|
goto err;
|
|
}
|
|
+ ret = io_uring_alloc_task_context(ctx->sqo_thread);
|
|
+ if (ret)
|
|
+ goto err;
|
|
wake_up_process(ctx->sqo_thread);
|
|
} else if (p->flags & IORING_SETUP_SQ_AFF) {
|
|
/* Can't have SQ_AFF without SQPOLL */
|
|
@@ -7125,8 +7192,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
|
|
return 0;
|
|
err:
|
|
io_finish_async(ctx);
|
|
- mmdrop(ctx->sqo_mm);
|
|
- ctx->sqo_mm = NULL;
|
|
return ret;
|
|
}
|
|
|
|
@@ -7456,8 +7521,12 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
|
|
static void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
|
{
|
|
io_finish_async(ctx);
|
|
- if (ctx->sqo_mm)
|
|
- mmdrop(ctx->sqo_mm);
|
|
+ if (ctx->sqo_task) {
|
|
+ put_task_struct(ctx->sqo_task);
|
|
+ ctx->sqo_task = NULL;
|
|
+ mmdrop(ctx->mm_account);
|
|
+ ctx->mm_account = NULL;
|
|
+ }
|
|
|
|
io_iopoll_reap_events(ctx);
|
|
io_sqe_buffer_unregister(ctx);
|
|
@@ -7528,7 +7597,7 @@ static void io_ring_exit_work(struct work_struct *work)
|
|
|
|
ctx = container_of(work, struct io_ring_ctx, exit_work);
|
|
if (ctx->rings)
|
|
- io_cqring_overflow_flush(ctx, true);
|
|
+ io_cqring_overflow_flush(ctx, true, NULL, NULL);
|
|
|
|
/*
|
|
* If we're doing polled IO and end up having requests being
|
|
@@ -7539,7 +7608,7 @@ static void io_ring_exit_work(struct work_struct *work)
|
|
while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)) {
|
|
io_iopoll_reap_events(ctx);
|
|
if (ctx->rings)
|
|
- io_cqring_overflow_flush(ctx, true);
|
|
+ io_cqring_overflow_flush(ctx, true, NULL, NULL);
|
|
}
|
|
io_ring_ctx_free(ctx);
|
|
}
|
|
@@ -7550,8 +7619,8 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
|
|
percpu_ref_kill(&ctx->refs);
|
|
mutex_unlock(&ctx->uring_lock);
|
|
|
|
- io_kill_timeouts(ctx);
|
|
- io_poll_remove_all(ctx);
|
|
+ io_kill_timeouts(ctx, NULL);
|
|
+ io_poll_remove_all(ctx, NULL);
|
|
|
|
if (ctx->io_wq)
|
|
io_wq_cancel_all(ctx->io_wq);
|
|
@@ -7559,7 +7628,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
|
|
io_iopoll_reap_events(ctx);
|
|
/* if we failed setting up the ctx, we might not have any rings */
|
|
if (ctx->rings)
|
|
- io_cqring_overflow_flush(ctx, true);
|
|
+ io_cqring_overflow_flush(ctx, true, NULL, NULL);
|
|
idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
|
|
|
|
/*
|
|
@@ -7588,7 +7657,7 @@ static bool io_wq_files_match(struct io_wq_work *work, void *data)
|
|
{
|
|
struct files_struct *files = data;
|
|
|
|
- return work->files == files;
|
|
+ return !files || work->files == files;
|
|
}
|
|
|
|
/*
|
|
@@ -7609,12 +7678,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
|
|
return false;
|
|
}
|
|
|
|
-static inline bool io_match_files(struct io_kiocb *req,
|
|
- struct files_struct *files)
|
|
-{
|
|
- return (req->flags & REQ_F_WORK_INITIALIZED) && req->work.files == files;
|
|
-}
|
|
-
|
|
static bool io_match_link_files(struct io_kiocb *req,
|
|
struct files_struct *files)
|
|
{
|
|
@@ -7729,11 +7792,14 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
|
|
}
|
|
}
|
|
|
|
-static void io_uring_cancel_files(struct io_ring_ctx *ctx,
|
|
+/*
|
|
+ * Returns true if we found and killed one or more files pinning requests
|
|
+ */
|
|
+static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
|
|
struct files_struct *files)
|
|
{
|
|
if (list_empty_careful(&ctx->inflight_list))
|
|
- return;
|
|
+ return false;
|
|
|
|
io_cancel_defer_files(ctx, files);
|
|
/* cancel all at once, should be faster than doing it one by one*/
|
|
@@ -7745,7 +7811,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
|
|
|
|
spin_lock_irq(&ctx->inflight_lock);
|
|
list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
|
|
- if (req->work.files != files)
|
|
+ if (files && req->work.files != files)
|
|
continue;
|
|
/* req is being completed, ignore */
|
|
if (!refcount_inc_not_zero(&req->refs))
|
|
@@ -7791,9 +7857,13 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
|
|
io_put_req(cancel_req);
|
|
}
|
|
|
|
+ /* cancellations _may_ trigger task work */
|
|
+ io_run_task_work();
|
|
schedule();
|
|
finish_wait(&ctx->inflight_wait, &wait);
|
|
}
|
|
+
|
|
+ return true;
|
|
}
|
|
|
|
static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
|
|
@@ -7801,21 +7871,198 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
|
|
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
|
|
struct task_struct *task = data;
|
|
|
|
- return req->task == task;
|
|
+ return io_task_match(req, task);
|
|
+}
|
|
+
|
|
+static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
|
|
+ struct task_struct *task,
|
|
+ struct files_struct *files)
|
|
+{
|
|
+ bool ret;
|
|
+
|
|
+ ret = io_uring_cancel_files(ctx, files);
|
|
+ if (!files) {
|
|
+ enum io_wq_cancel cret;
|
|
+
|
|
+ cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true);
|
|
+ if (cret != IO_WQ_CANCEL_NOTFOUND)
|
|
+ ret = true;
|
|
+
|
|
+ /* SQPOLL thread does its own polling */
|
|
+ if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
|
|
+ if (!list_empty_careful(&ctx->poll_list)) {
|
|
+ io_iopoll_reap_events(ctx);
|
|
+ ret = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret |= io_poll_remove_all(ctx, task);
|
|
+ ret |= io_kill_timeouts(ctx, task);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * We need to iteratively cancel requests, in case a request has dependent
|
|
+ * hard links. These persist even for failure of cancelations, hence keep
|
|
+ * looping until none are found.
|
|
+ */
|
|
+static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
|
|
+ struct files_struct *files)
|
|
+{
|
|
+ struct task_struct *task = current;
|
|
+
|
|
+ if (ctx->flags & IORING_SETUP_SQPOLL)
|
|
+ task = ctx->sqo_thread;
|
|
+
|
|
+ io_cqring_overflow_flush(ctx, true, task, files);
|
|
+
|
|
+ while (__io_uring_cancel_task_requests(ctx, task, files)) {
|
|
+ io_run_task_work();
|
|
+ cond_resched();
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Note that this task has used io_uring. We use it for cancelation purposes.
|
|
+ */
|
|
+static int io_uring_add_task_file(struct file *file)
|
|
+{
|
|
+ struct io_uring_task *tctx = current->io_uring;
|
|
+
|
|
+ if (unlikely(!tctx)) {
|
|
+ int ret;
|
|
+
|
|
+ ret = io_uring_alloc_task_context(current);
|
|
+ if (unlikely(ret))
|
|
+ return ret;
|
|
+ tctx = current->io_uring;
|
|
+ }
|
|
+ if (tctx->last != file) {
|
|
+ void *old = xa_load(&tctx->xa, (unsigned long)file);
|
|
+
|
|
+ if (!old) {
|
|
+ get_file(file);
|
|
+ xa_store(&tctx->xa, (unsigned long)file, file, GFP_KERNEL);
|
|
+ }
|
|
+ tctx->last = file;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Remove this io_uring_file -> task mapping.
|
|
+ */
|
|
+static void io_uring_del_task_file(struct file *file)
|
|
+{
|
|
+ struct io_uring_task *tctx = current->io_uring;
|
|
+
|
|
+ if (tctx->last == file)
|
|
+ tctx->last = NULL;
|
|
+ file = xa_erase(&tctx->xa, (unsigned long)file);
|
|
+ if (file)
|
|
+ fput(file);
|
|
+}
|
|
+
|
|
+static void __io_uring_attempt_task_drop(struct file *file)
|
|
+{
|
|
+ struct file *old = xa_load(¤t->io_uring->xa, (unsigned long)file);
|
|
+
|
|
+ if (old == file)
|
|
+ io_uring_del_task_file(file);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Drop task note for this file if we're the only ones that hold it after
|
|
+ * pending fput()
|
|
+ */
|
|
+static void io_uring_attempt_task_drop(struct file *file, bool exiting)
|
|
+{
|
|
+ if (!current->io_uring)
|
|
+ return;
|
|
+ /*
|
|
+ * fput() is pending, will be 2 if the only other ref is our potential
|
|
+ * task file note. If the task is exiting, drop regardless of count.
|
|
+ */
|
|
+ if (!exiting && atomic_long_read(&file->f_count) != 2)
|
|
+ return;
|
|
+
|
|
+ __io_uring_attempt_task_drop(file);
|
|
+}
|
|
+
|
|
+void __io_uring_files_cancel(struct files_struct *files)
|
|
+{
|
|
+ struct io_uring_task *tctx = current->io_uring;
|
|
+ struct file *file;
|
|
+ unsigned long index;
|
|
+
|
|
+ /* make sure overflow events are dropped */
|
|
+ tctx->in_idle = true;
|
|
+
|
|
+ xa_for_each(&tctx->xa, index, file) {
|
|
+ struct io_ring_ctx *ctx = file->private_data;
|
|
+
|
|
+ io_uring_cancel_task_requests(ctx, files);
|
|
+ if (files)
|
|
+ io_uring_del_task_file(file);
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline bool io_uring_task_idle(struct io_uring_task *tctx)
|
|
+{
|
|
+ return atomic_long_read(&tctx->req_issue) ==
|
|
+ atomic_long_read(&tctx->req_complete);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Find any io_uring fd that this task has registered or done IO on, and cancel
|
|
+ * requests.
|
|
+ */
|
|
+void __io_uring_task_cancel(void)
|
|
+{
|
|
+ struct io_uring_task *tctx = current->io_uring;
|
|
+ DEFINE_WAIT(wait);
|
|
+ long completions;
|
|
+
|
|
+ /* make sure overflow events are dropped */
|
|
+ tctx->in_idle = true;
|
|
+
|
|
+ while (!io_uring_task_idle(tctx)) {
|
|
+ /* read completions before cancelations */
|
|
+ completions = atomic_long_read(&tctx->req_complete);
|
|
+ __io_uring_files_cancel(NULL);
|
|
+
|
|
+ prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
|
|
+
|
|
+ /*
|
|
+ * If we've seen completions, retry. This avoids a race where
|
|
+ * a completion comes in before we did prepare_to_wait().
|
|
+ */
|
|
+ if (completions != atomic_long_read(&tctx->req_complete))
|
|
+ continue;
|
|
+ if (io_uring_task_idle(tctx))
|
|
+ break;
|
|
+ schedule();
|
|
+ }
|
|
+
|
|
+ finish_wait(&tctx->wait, &wait);
|
|
+ tctx->in_idle = false;
|
|
}
|
|
|
|
static int io_uring_flush(struct file *file, void *data)
|
|
{
|
|
struct io_ring_ctx *ctx = file->private_data;
|
|
|
|
- io_uring_cancel_files(ctx, data);
|
|
-
|
|
/*
|
|
* If the task is going away, cancel work it may have pending
|
|
*/
|
|
if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
|
|
- io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
|
|
+ data = NULL;
|
|
|
|
+ io_uring_cancel_task_requests(ctx, data);
|
|
+ io_uring_attempt_task_drop(file, !data);
|
|
return 0;
|
|
}
|
|
|
|
@@ -7924,13 +8171,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
|
|
ret = 0;
|
|
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
|
if (!list_empty_careful(&ctx->cq_overflow_list))
|
|
- io_cqring_overflow_flush(ctx, false);
|
|
+ io_cqring_overflow_flush(ctx, false, NULL, NULL);
|
|
if (flags & IORING_ENTER_SQ_WAKEUP)
|
|
wake_up(&ctx->sqo_wait);
|
|
submitted = to_submit;
|
|
} else if (to_submit) {
|
|
+ ret = io_uring_add_task_file(f.file);
|
|
+ if (unlikely(ret))
|
|
+ goto out;
|
|
mutex_lock(&ctx->uring_lock);
|
|
- submitted = io_submit_sqes(ctx, to_submit, f.file, fd);
|
|
+ submitted = io_submit_sqes(ctx, to_submit);
|
|
mutex_unlock(&ctx->uring_lock);
|
|
|
|
if (submitted != to_submit)
|
|
@@ -8142,6 +8392,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
|
|
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
|
|
O_RDWR | O_CLOEXEC);
|
|
if (IS_ERR(file)) {
|
|
+err_fd:
|
|
put_unused_fd(ret);
|
|
ret = PTR_ERR(file);
|
|
goto err;
|
|
@@ -8150,6 +8401,10 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
|
|
#if defined(CONFIG_UNIX)
|
|
ctx->ring_sock->file = file;
|
|
#endif
|
|
+ if (unlikely(io_uring_add_task_file(file))) {
|
|
+ file = ERR_PTR(-ENOMEM);
|
|
+ goto err_fd;
|
|
+ }
|
|
fd_install(ret, file);
|
|
return ret;
|
|
err:
|
|
@@ -8228,6 +8483,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
|
|
ctx->user = user;
|
|
ctx->creds = get_current_cred();
|
|
|
|
+ ctx->sqo_task = get_task_struct(current);
|
|
+ /*
|
|
+ * This is just grabbed for accounting purposes. When a process exits,
|
|
+ * the mm is exited and dropped before the files, hence we need to hang
|
|
+ * on to this mm purely for the purposes of being able to unaccount
|
|
+ * memory (locked/pinned vm). It's not used for anything else.
|
|
+ */
|
|
+ mmgrab(current->mm);
|
|
+ ctx->mm_account = current->mm;
|
|
+
|
|
ret = io_allocate_scq_urings(ctx, p);
|
|
if (ret)
|
|
goto err;
|
|
diff --git a/include/linux/fs.h b/include/linux/fs.h
|
|
index ac1e89872db4f..819245cc9dbd4 100644
|
|
--- a/include/linux/fs.h
|
|
+++ b/include/linux/fs.h
|
|
@@ -3011,7 +3011,6 @@ extern int do_pipe_flags(int *, int);
|
|
id(UNKNOWN, unknown) \
|
|
id(FIRMWARE, firmware) \
|
|
id(FIRMWARE_PREALLOC_BUFFER, firmware) \
|
|
- id(FIRMWARE_EFI_EMBEDDED, firmware) \
|
|
id(MODULE, kernel-module) \
|
|
id(KEXEC_IMAGE, kexec-image) \
|
|
id(KEXEC_INITRAMFS, kexec-initramfs) \
|
|
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
|
|
new file mode 100644
|
|
index 0000000000000..c09135a1ef132
|
|
--- /dev/null
|
|
+++ b/include/linux/io_uring.h
|
|
@@ -0,0 +1,53 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
+#ifndef _LINUX_IO_URING_H
|
|
+#define _LINUX_IO_URING_H
|
|
+
|
|
+#include <linux/sched.h>
|
|
+#include <linux/xarray.h>
|
|
+#include <linux/percpu-refcount.h>
|
|
+
|
|
+struct io_uring_task {
|
|
+ /* submission side */
|
|
+ struct xarray xa;
|
|
+ struct wait_queue_head wait;
|
|
+ struct file *last;
|
|
+ atomic_long_t req_issue;
|
|
+
|
|
+ /* completion side */
|
|
+ bool in_idle ____cacheline_aligned_in_smp;
|
|
+ atomic_long_t req_complete;
|
|
+};
|
|
+
|
|
+#if defined(CONFIG_IO_URING)
|
|
+void __io_uring_task_cancel(void);
|
|
+void __io_uring_files_cancel(struct files_struct *files);
|
|
+void __io_uring_free(struct task_struct *tsk);
|
|
+
|
|
+static inline void io_uring_task_cancel(void)
|
|
+{
|
|
+ if (current->io_uring && !xa_empty(¤t->io_uring->xa))
|
|
+ __io_uring_task_cancel();
|
|
+}
|
|
+static inline void io_uring_files_cancel(struct files_struct *files)
|
|
+{
|
|
+ if (current->io_uring && !xa_empty(¤t->io_uring->xa))
|
|
+ __io_uring_files_cancel(files);
|
|
+}
|
|
+static inline void io_uring_free(struct task_struct *tsk)
|
|
+{
|
|
+ if (tsk->io_uring)
|
|
+ __io_uring_free(tsk);
|
|
+}
|
|
+#else
|
|
+static inline void io_uring_task_cancel(void)
|
|
+{
|
|
+}
|
|
+static inline void io_uring_files_cancel(struct files_struct *files)
|
|
+{
|
|
+}
|
|
+static inline void io_uring_free(struct task_struct *tsk)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif
|
|
diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
|
|
index 122f3439e1af2..c65d7a3be3c69 100644
|
|
--- a/include/linux/mtd/pfow.h
|
|
+++ b/include/linux/mtd/pfow.h
|
|
@@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr)
|
|
|
|
if (!(dsr & DSR_AVAILABLE))
|
|
printk(KERN_NOTICE"DSR.15: (0) Device not Available\n");
|
|
- if (prog_status & 0x03)
|
|
+ if ((prog_status & 0x03) == 0x03)
|
|
printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid "
|
|
"half with 41h command\n");
|
|
else if (prog_status & 0x02)
|
|
diff --git a/include/linux/pm.h b/include/linux/pm.h
|
|
index 121c104a4090e..1010bf3d3008b 100644
|
|
--- a/include/linux/pm.h
|
|
+++ b/include/linux/pm.h
|
|
@@ -584,7 +584,7 @@ struct dev_pm_info {
|
|
#endif
|
|
#ifdef CONFIG_PM
|
|
struct hrtimer suspend_timer;
|
|
- unsigned long timer_expires;
|
|
+ u64 timer_expires;
|
|
struct work_struct work;
|
|
wait_queue_head_t wait_queue;
|
|
struct wake_irq *wakeirq;
|
|
diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
|
|
index dd464943f717a..5b90eff50bf6e 100644
|
|
--- a/include/linux/qcom-geni-se.h
|
|
+++ b/include/linux/qcom-geni-se.h
|
|
@@ -229,6 +229,9 @@ struct geni_se {
|
|
#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT)
|
|
#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK)
|
|
|
|
+/* QUP SE VERSION value for major number 2 and minor number 5 */
|
|
+#define QUP_SE_VERSION_2_5 0x20050000
|
|
+
|
|
#if IS_ENABLED(CONFIG_QCOM_GENI_SE)
|
|
|
|
u32 geni_se_get_qup_hw_version(struct geni_se *se);
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index 6833729430932..f0f38e86ab1ee 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -61,6 +61,7 @@ struct sighand_struct;
|
|
struct signal_struct;
|
|
struct task_delay_info;
|
|
struct task_group;
|
|
+struct io_uring_task;
|
|
|
|
/*
|
|
* Task state bitmask. NOTE! These bits are also
|
|
@@ -923,6 +924,10 @@ struct task_struct {
|
|
/* Open file information: */
|
|
struct files_struct *files;
|
|
|
|
+#ifdef CONFIG_IO_URING
|
|
+ struct io_uring_task *io_uring;
|
|
+#endif
|
|
+
|
|
/* Namespaces: */
|
|
struct nsproxy *nsproxy;
|
|
|
|
diff --git a/include/linux/string.h b/include/linux/string.h
|
|
index 9b7a0632e87aa..b1f3894a0a3e4 100644
|
|
--- a/include/linux/string.h
|
|
+++ b/include/linux/string.h
|
|
@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
|
|
#ifndef __HAVE_ARCH_MEMCHR
|
|
extern void * memchr(const void *,int,__kernel_size_t);
|
|
#endif
|
|
-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
|
|
-static inline __must_check unsigned long memcpy_mcsafe(void *dst,
|
|
- const void *src, size_t cnt)
|
|
-{
|
|
- memcpy(dst, src, cnt);
|
|
- return 0;
|
|
-}
|
|
-#endif
|
|
#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
|
|
static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
|
|
{
|
|
memcpy(dst, src, cnt);
|
|
}
|
|
#endif
|
|
+
|
|
void *memchr_inv(const void *s, int c, size_t n);
|
|
char *strreplace(char *s, char old, char new);
|
|
|
|
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
|
|
index 0a76ddc07d597..1ef3ab2343aa4 100644
|
|
--- a/include/linux/uaccess.h
|
|
+++ b/include/linux/uaccess.h
|
|
@@ -163,6 +163,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
|
|
}
|
|
#endif
|
|
|
|
+#ifndef copy_mc_to_kernel
|
|
+/*
|
|
+ * Without arch opt-in this generic copy_mc_to_kernel() will not handle
|
|
+ * #MC (or arch equivalent) during source read.
|
|
+ */
|
|
+static inline unsigned long __must_check
|
|
+copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
|
|
+{
|
|
+ memcpy(dst, src, cnt);
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
static __always_inline void pagefault_disabled_inc(void)
|
|
{
|
|
current->pagefault_disabled++;
|
|
diff --git a/include/linux/uio.h b/include/linux/uio.h
|
|
index 9576fd8158d7d..6a97b4d10b2ed 100644
|
|
--- a/include/linux/uio.h
|
|
+++ b/include/linux/uio.h
|
|
@@ -186,10 +186,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
|
|
#define _copy_from_iter_flushcache _copy_from_iter_nocache
|
|
#endif
|
|
|
|
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
|
|
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
|
|
+#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
|
|
#else
|
|
-#define _copy_to_iter_mcsafe _copy_to_iter
|
|
+#define _copy_mc_to_iter _copy_to_iter
|
|
#endif
|
|
|
|
static __always_inline __must_check
|
|
@@ -202,12 +202,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
|
|
}
|
|
|
|
static __always_inline __must_check
|
|
-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
|
|
+size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
|
|
{
|
|
if (unlikely(!check_copy_size(addr, bytes, true)))
|
|
return 0;
|
|
else
|
|
- return _copy_to_iter_mcsafe(addr, bytes, i);
|
|
+ return _copy_mc_to_iter(addr, bytes, i);
|
|
}
|
|
|
|
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
|
|
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
|
|
index ec2cbfab71f35..f09541cba3c9d 100644
|
|
--- a/include/net/netfilter/nf_tables.h
|
|
+++ b/include/net/netfilter/nf_tables.h
|
|
@@ -896,6 +896,12 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
|
|
return (struct nft_expr *)&rule->data[rule->dlen];
|
|
}
|
|
|
|
+static inline bool nft_expr_more(const struct nft_rule *rule,
|
|
+ const struct nft_expr *expr)
|
|
+{
|
|
+ return expr != nft_expr_last(rule) && expr->ops;
|
|
+}
|
|
+
|
|
static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
|
|
{
|
|
return (void *)&rule->data[rule->dlen];
|
|
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
|
|
index a3fd55194e0b1..7bffadcfd6eb0 100644
|
|
--- a/include/uapi/linux/bpf.h
|
|
+++ b/include/uapi/linux/bpf.h
|
|
@@ -1416,8 +1416,8 @@ union bpf_attr {
|
|
* Return
|
|
* The return value depends on the result of the test, and can be:
|
|
*
|
|
- * * 0, if the *skb* task belongs to the cgroup2.
|
|
- * * 1, if the *skb* task does not belong to the cgroup2.
|
|
+ * * 0, if current task belongs to the cgroup2.
|
|
+ * * 1, if current task does not belong to the cgroup2.
|
|
* * A negative error code, if an error occurred.
|
|
*
|
|
* int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
|
|
diff --git a/init/init_task.c b/init/init_task.c
|
|
index 15089d15010ab..7802f91109b48 100644
|
|
--- a/init/init_task.c
|
|
+++ b/init/init_task.c
|
|
@@ -113,6 +113,9 @@ struct task_struct init_task
|
|
.thread = INIT_THREAD,
|
|
.fs = &init_fs,
|
|
.files = &init_files,
|
|
+#ifdef CONFIG_IO_URING
|
|
+ .io_uring = NULL,
|
|
+#endif
|
|
.signal = &init_signals,
|
|
.sighand = &init_sighand,
|
|
.nsproxy = &init_nsproxy,
|
|
diff --git a/kernel/fork.c b/kernel/fork.c
|
|
index 0074bbe8c66f1..c725015b3c465 100644
|
|
--- a/kernel/fork.c
|
|
+++ b/kernel/fork.c
|
|
@@ -95,6 +95,7 @@
|
|
#include <linux/stackleak.h>
|
|
#include <linux/kasan.h>
|
|
#include <linux/scs.h>
|
|
+#include <linux/io_uring.h>
|
|
|
|
#include <asm/pgalloc.h>
|
|
#include <linux/uaccess.h>
|
|
@@ -745,6 +746,7 @@ void __put_task_struct(struct task_struct *tsk)
|
|
WARN_ON(refcount_read(&tsk->usage));
|
|
WARN_ON(tsk == current);
|
|
|
|
+ io_uring_free(tsk);
|
|
cgroup_free(tsk);
|
|
task_numa_free(tsk, true);
|
|
security_task_free(tsk);
|
|
@@ -2022,6 +2024,10 @@ static __latent_entropy struct task_struct *copy_process(
|
|
p->vtime.state = VTIME_INACTIVE;
|
|
#endif
|
|
|
|
+#ifdef CONFIG_IO_URING
|
|
+ p->io_uring = NULL;
|
|
+#endif
|
|
+
|
|
#if defined(SPLIT_RSS_COUNTING)
|
|
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
|
|
#endif
|
|
diff --git a/lib/Kconfig b/lib/Kconfig
|
|
index df3f3da959900..7761458649377 100644
|
|
--- a/lib/Kconfig
|
|
+++ b/lib/Kconfig
|
|
@@ -631,7 +631,12 @@ config UACCESS_MEMCPY
|
|
config ARCH_HAS_UACCESS_FLUSHCACHE
|
|
bool
|
|
|
|
-config ARCH_HAS_UACCESS_MCSAFE
|
|
+# arch has a concept of a recoverable synchronous exception due to a
|
|
+# memory-read error like x86 machine-check or ARM data-abort, and
|
|
+# implements copy_mc_to_{user,kernel} to abort and report
|
|
+# 'bytes-transferred' if that exception fires when accessing the source
|
|
+# buffer.
|
|
+config ARCH_HAS_COPY_MC
|
|
bool
|
|
|
|
# Temporary. Goes away when all archs are cleaned up
|
|
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
|
|
index bf538c2bec777..aefe469905434 100644
|
|
--- a/lib/iov_iter.c
|
|
+++ b/lib/iov_iter.c
|
|
@@ -636,30 +636,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
|
}
|
|
EXPORT_SYMBOL(_copy_to_iter);
|
|
|
|
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
|
|
-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
|
|
+#ifdef CONFIG_ARCH_HAS_COPY_MC
|
|
+static int copyout_mc(void __user *to, const void *from, size_t n)
|
|
{
|
|
if (access_ok(to, n)) {
|
|
instrument_copy_to_user(to, from, n);
|
|
- n = copy_to_user_mcsafe((__force void *) to, from, n);
|
|
+ n = copy_mc_to_user((__force void *) to, from, n);
|
|
}
|
|
return n;
|
|
}
|
|
|
|
-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
|
|
+static unsigned long copy_mc_to_page(struct page *page, size_t offset,
|
|
const char *from, size_t len)
|
|
{
|
|
unsigned long ret;
|
|
char *to;
|
|
|
|
to = kmap_atomic(page);
|
|
- ret = memcpy_mcsafe(to + offset, from, len);
|
|
+ ret = copy_mc_to_kernel(to + offset, from, len);
|
|
kunmap_atomic(to);
|
|
|
|
return ret;
|
|
}
|
|
|
|
-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
|
|
+static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
|
|
struct iov_iter *i)
|
|
{
|
|
struct pipe_inode_info *pipe = i->pipe;
|
|
@@ -677,7 +677,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
|
|
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
|
|
unsigned long rem;
|
|
|
|
- rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
|
|
+ rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
|
|
off, addr, chunk);
|
|
i->head = i_head;
|
|
i->iov_offset = off + chunk - rem;
|
|
@@ -694,18 +694,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
|
|
}
|
|
|
|
/**
|
|
- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
|
|
+ * _copy_mc_to_iter - copy to iter with source memory error exception handling
|
|
* @addr: source kernel address
|
|
* @bytes: total transfer length
|
|
* @iter: destination iterator
|
|
*
|
|
- * The pmem driver arranges for filesystem-dax to use this facility via
|
|
- * dax_copy_to_iter() for protecting read/write to persistent memory.
|
|
- * Unless / until an architecture can guarantee identical performance
|
|
- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
|
|
- * performance regression to switch more users to the mcsafe version.
|
|
+ * The pmem driver deploys this for the dax operation
|
|
+ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
|
|
+ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
|
|
+ * successfully copied.
|
|
*
|
|
- * Otherwise, the main differences between this and typical _copy_to_iter().
|
|
+ * The main differences between this and typical _copy_to_iter().
|
|
*
|
|
* * Typical tail/residue handling after a fault retries the copy
|
|
* byte-by-byte until the fault happens again. Re-triggering machine
|
|
@@ -716,23 +715,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
|
|
* * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
|
|
* Compare to copy_to_iter() where only ITER_IOVEC attempts might return
|
|
* a short copy.
|
|
- *
|
|
- * See MCSAFE_TEST for self-test.
|
|
*/
|
|
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
|
|
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
|
{
|
|
const char *from = addr;
|
|
unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
|
|
|
|
if (unlikely(iov_iter_is_pipe(i)))
|
|
- return copy_pipe_to_iter_mcsafe(addr, bytes, i);
|
|
+ return copy_mc_pipe_to_iter(addr, bytes, i);
|
|
if (iter_is_iovec(i))
|
|
might_fault();
|
|
iterate_and_advance(i, bytes, v,
|
|
- copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
|
|
+ copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
|
|
+ v.iov_len),
|
|
({
|
|
- rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
|
|
- (from += v.bv_len) - v.bv_len, v.bv_len);
|
|
+ rem = copy_mc_to_page(v.bv_page, v.bv_offset,
|
|
+ (from += v.bv_len) - v.bv_len, v.bv_len);
|
|
if (rem) {
|
|
curr_addr = (unsigned long) from;
|
|
bytes = curr_addr - s_addr - rem;
|
|
@@ -740,8 +738,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
|
|
}
|
|
}),
|
|
({
|
|
- rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
|
|
- v.iov_len);
|
|
+ rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
|
|
+ - v.iov_len, v.iov_len);
|
|
if (rem) {
|
|
curr_addr = (unsigned long) from;
|
|
bytes = curr_addr - s_addr - rem;
|
|
@@ -752,8 +750,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
|
|
|
|
return bytes;
|
|
}
|
|
-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
|
|
-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
|
|
+EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
|
|
+#endif /* CONFIG_ARCH_HAS_COPY_MC */
|
|
|
|
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
|
|
{
|
|
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
|
|
index 06a8242aa6980..6dd7f44497ecc 100644
|
|
--- a/net/ipv4/tcp.c
|
|
+++ b/net/ipv4/tcp.c
|
|
@@ -483,6 +483,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
|
|
return true;
|
|
if (tcp_rmem_pressure(sk))
|
|
return true;
|
|
+ if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
|
|
+ return true;
|
|
}
|
|
if (sk->sk_prot->stream_memory_read)
|
|
return sk->sk_prot->stream_memory_read(sk);
|
|
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
|
|
index 02cc972edd0b0..6c7e982169467 100644
|
|
--- a/net/ipv4/tcp_input.c
|
|
+++ b/net/ipv4/tcp_input.c
|
|
@@ -4790,7 +4790,8 @@ void tcp_data_ready(struct sock *sk)
|
|
int avail = tp->rcv_nxt - tp->copied_seq;
|
|
|
|
if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
|
|
- !sock_flag(sk, SOCK_DONE))
|
|
+ !sock_flag(sk, SOCK_DONE) &&
|
|
+ tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
|
|
return;
|
|
|
|
sk->sk_data_ready(sk);
|
|
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
|
|
index 05059f620d41e..fe51a7df4f524 100644
|
|
--- a/net/netfilter/nf_tables_api.c
|
|
+++ b/net/netfilter/nf_tables_api.c
|
|
@@ -295,7 +295,7 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
|
|
struct nft_expr *expr;
|
|
|
|
expr = nft_expr_first(rule);
|
|
- while (expr != nft_expr_last(rule) && expr->ops) {
|
|
+ while (nft_expr_more(rule, expr)) {
|
|
if (expr->ops->activate)
|
|
expr->ops->activate(ctx, expr);
|
|
|
|
@@ -310,7 +310,7 @@ static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
|
|
struct nft_expr *expr;
|
|
|
|
expr = nft_expr_first(rule);
|
|
- while (expr != nft_expr_last(rule) && expr->ops) {
|
|
+ while (nft_expr_more(rule, expr)) {
|
|
if (expr->ops->deactivate)
|
|
expr->ops->deactivate(ctx, expr, phase);
|
|
|
|
@@ -2917,7 +2917,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
|
|
* is called on error from nf_tables_newrule().
|
|
*/
|
|
expr = nft_expr_first(rule);
|
|
- while (expr != nft_expr_last(rule) && expr->ops) {
|
|
+ while (nft_expr_more(rule, expr)) {
|
|
next = nft_expr_next(expr);
|
|
nf_tables_expr_destroy(ctx, expr);
|
|
expr = next;
|
|
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
|
|
index c7cf1cde46def..ce2387bfb5dce 100644
|
|
--- a/net/netfilter/nf_tables_offload.c
|
|
+++ b/net/netfilter/nf_tables_offload.c
|
|
@@ -37,7 +37,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
|
|
struct nft_expr *expr;
|
|
|
|
expr = nft_expr_first(rule);
|
|
- while (expr->ops && expr != nft_expr_last(rule)) {
|
|
+ while (nft_expr_more(rule, expr)) {
|
|
if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION)
|
|
num_actions++;
|
|
|
|
@@ -61,7 +61,7 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
|
|
ctx->net = net;
|
|
ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
|
|
|
|
- while (expr->ops && expr != nft_expr_last(rule)) {
|
|
+ while (nft_expr_more(rule, expr)) {
|
|
if (!expr->ops->offload) {
|
|
err = -EOPNOTSUPP;
|
|
goto err_out;
|
|
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
|
|
index e298ec3b3c9e3..ca026e2bf8d27 100644
|
|
--- a/net/sched/act_mpls.c
|
|
+++ b/net/sched/act_mpls.c
|
|
@@ -408,6 +408,7 @@ static void __exit mpls_cleanup_module(void)
|
|
module_init(mpls_init_module);
|
|
module_exit(mpls_cleanup_module);
|
|
|
|
+MODULE_SOFTDEP("post: mpls_gso");
|
|
MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_DESCRIPTION("MPLS manipulation actions");
|
|
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
|
|
index 8bf6bde1cfe59..aa2448253dbab 100644
|
|
--- a/net/sched/cls_api.c
|
|
+++ b/net/sched/cls_api.c
|
|
@@ -650,12 +650,12 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
|
|
block_cb->indr.binder_type,
|
|
&block->flow_block, tcf_block_shared(block),
|
|
&extack);
|
|
+ rtnl_lock();
|
|
down_write(&block->cb_lock);
|
|
list_del(&block_cb->driver_list);
|
|
list_move(&block_cb->list, &bo.cb_list);
|
|
- up_write(&block->cb_lock);
|
|
- rtnl_lock();
|
|
tcf_block_unbind(block, &bo);
|
|
+ up_write(&block->cb_lock);
|
|
rtnl_unlock();
|
|
}
|
|
|
|
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
|
|
index 84f82771cdf5d..0c345e43a09a3 100644
|
|
--- a/net/sched/sch_netem.c
|
|
+++ b/net/sched/sch_netem.c
|
|
@@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma,
|
|
|
|
/* default uniform distribution */
|
|
if (dist == NULL)
|
|
- return ((rnd % (2 * sigma)) + mu) - sigma;
|
|
+ return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
|
|
|
|
t = dist->table[rnd % dist->size];
|
|
x = (sigma % NETEM_DIST_SCALE) * t;
|
|
@@ -812,6 +812,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
|
|
q->slot_config.max_packets = INT_MAX;
|
|
if (q->slot_config.max_bytes == 0)
|
|
q->slot_config.max_bytes = INT_MAX;
|
|
+
|
|
+ /* capping dist_jitter to the range acceptable by tabledist() */
|
|
+ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
|
|
+
|
|
q->slot.packets_left = q->slot_config.max_packets;
|
|
q->slot.bytes_left = q->slot_config.max_bytes;
|
|
if (q->slot_config.min_delay | q->slot_config.max_delay |
|
|
@@ -1037,6 +1041,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
|
|
if (tb[TCA_NETEM_SLOT])
|
|
get_slot(q, tb[TCA_NETEM_SLOT]);
|
|
|
|
+ /* capping jitter to the range acceptable by tabledist() */
|
|
+ q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
|
|
+
|
|
return ret;
|
|
|
|
get_table_failure:
|
|
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
|
|
index 15b24fbcbe970..0d6297f75df18 100644
|
|
--- a/net/tipc/msg.c
|
|
+++ b/net/tipc/msg.c
|
|
@@ -150,12 +150,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
|
|
if (fragid == FIRST_FRAGMENT) {
|
|
if (unlikely(head))
|
|
goto err;
|
|
- if (skb_cloned(frag))
|
|
- frag = skb_copy(frag, GFP_ATOMIC);
|
|
+ *buf = NULL;
|
|
+ frag = skb_unshare(frag, GFP_ATOMIC);
|
|
if (unlikely(!frag))
|
|
goto err;
|
|
head = *headbuf = frag;
|
|
- *buf = NULL;
|
|
TIPC_SKB_CB(head)->tail = NULL;
|
|
if (skb_is_nonlinear(head)) {
|
|
skb_walk_frags(head, tail) {
|
|
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
|
|
index 20f2efd57b11a..bb709eda96cdf 100755
|
|
--- a/scripts/setlocalversion
|
|
+++ b/scripts/setlocalversion
|
|
@@ -45,7 +45,7 @@ scm_version()
|
|
|
|
# Check for git and a git repo.
|
|
if test -z "$(git rev-parse --show-cdup 2>/dev/null)" &&
|
|
- head=$(git rev-parse --verify --short HEAD 2>/dev/null); then
|
|
+ head=$(git rev-parse --verify HEAD 2>/dev/null); then
|
|
|
|
# If we are at a tagged commit (like "v2.6.30-rc6"), we ignore
|
|
# it, because this version is defined in the top level Makefile.
|
|
@@ -59,11 +59,22 @@ scm_version()
|
|
fi
|
|
# If we are past a tagged commit (like
|
|
# "v2.6.30-rc5-302-g72357d5"), we pretty print it.
|
|
- if atag="$(git describe 2>/dev/null)"; then
|
|
- echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}'
|
|
-
|
|
- # If we don't have a tag at all we print -g{commitish}.
|
|
+ #
|
|
+ # Ensure the abbreviated sha1 has exactly 12
|
|
+ # hex characters, to make the output
|
|
+ # independent of git version, local
|
|
+ # core.abbrev settings and/or total number of
|
|
+ # objects in the current repository - passing
|
|
+ # --abbrev=12 ensures a minimum of 12, and the
|
|
+ # awk substr() then picks the 'g' and first 12
|
|
+ # hex chars.
|
|
+ if atag="$(git describe --abbrev=12 2>/dev/null)"; then
|
|
+ echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}'
|
|
+
|
|
+ # If we don't have a tag at all we print -g{commitish},
|
|
+ # again using exactly 12 hex chars.
|
|
else
|
|
+ head="$(echo $head | cut -c1-12)"
|
|
printf '%s%s' -g $head
|
|
fi
|
|
fi
|
|
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
|
|
index 0d36259b690df..e4b47759ba1ca 100644
|
|
--- a/security/integrity/evm/evm_main.c
|
|
+++ b/security/integrity/evm/evm_main.c
|
|
@@ -181,6 +181,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
|
|
break;
|
|
case EVM_IMA_XATTR_DIGSIG:
|
|
case EVM_XATTR_PORTABLE_DIGSIG:
|
|
+ /* accept xattr with non-empty signature field */
|
|
+ if (xattr_len <= sizeof(struct signature_v2_hdr)) {
|
|
+ evm_status = INTEGRITY_FAIL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
hdr = (struct signature_v2_hdr *)xattr_data;
|
|
digest.hdr.algo = hdr->hash_algo;
|
|
rc = evm_calc_hash(dentry, xattr_name, xattr_value,
|
|
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
|
|
deleted file mode 100644
|
|
index 2ccd588fbad45..0000000000000
|
|
--- a/tools/arch/x86/include/asm/mcsafe_test.h
|
|
+++ /dev/null
|
|
@@ -1,13 +0,0 @@
|
|
-/* SPDX-License-Identifier: GPL-2.0 */
|
|
-#ifndef _MCSAFE_TEST_H_
|
|
-#define _MCSAFE_TEST_H_
|
|
-
|
|
-.macro MCSAFE_TEST_CTL
|
|
-.endm
|
|
-
|
|
-.macro MCSAFE_TEST_SRC reg count target
|
|
-.endm
|
|
-
|
|
-.macro MCSAFE_TEST_DST reg count target
|
|
-.endm
|
|
-#endif /* _MCSAFE_TEST_H_ */
|
|
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
|
|
index 45f8e1b02241f..0b5b8ae56bd91 100644
|
|
--- a/tools/arch/x86/lib/memcpy_64.S
|
|
+++ b/tools/arch/x86/lib/memcpy_64.S
|
|
@@ -4,7 +4,6 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/cpufeatures.h>
|
|
-#include <asm/mcsafe_test.h>
|
|
#include <asm/alternative-asm.h>
|
|
#include <asm/export.h>
|
|
|
|
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
|
|
SYM_FUNC_END(memcpy_orig)
|
|
|
|
.popsection
|
|
-
|
|
-#ifndef CONFIG_UML
|
|
-
|
|
-MCSAFE_TEST_CTL
|
|
-
|
|
-/*
|
|
- * __memcpy_mcsafe - memory copy with machine check exception handling
|
|
- * Note that we only catch machine checks when reading the source addresses.
|
|
- * Writes to target are posted and don't generate machine checks.
|
|
- */
|
|
-SYM_FUNC_START(__memcpy_mcsafe)
|
|
- cmpl $8, %edx
|
|
- /* Less than 8 bytes? Go to byte copy loop */
|
|
- jb .L_no_whole_words
|
|
-
|
|
- /* Check for bad alignment of source */
|
|
- testl $7, %esi
|
|
- /* Already aligned */
|
|
- jz .L_8byte_aligned
|
|
-
|
|
- /* Copy one byte at a time until source is 8-byte aligned */
|
|
- movl %esi, %ecx
|
|
- andl $7, %ecx
|
|
- subl $8, %ecx
|
|
- negl %ecx
|
|
- subl %ecx, %edx
|
|
-.L_read_leading_bytes:
|
|
- movb (%rsi), %al
|
|
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
|
|
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
|
|
-.L_write_leading_bytes:
|
|
- movb %al, (%rdi)
|
|
- incq %rsi
|
|
- incq %rdi
|
|
- decl %ecx
|
|
- jnz .L_read_leading_bytes
|
|
-
|
|
-.L_8byte_aligned:
|
|
- movl %edx, %ecx
|
|
- andl $7, %edx
|
|
- shrl $3, %ecx
|
|
- jz .L_no_whole_words
|
|
-
|
|
-.L_read_words:
|
|
- movq (%rsi), %r8
|
|
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
|
|
- MCSAFE_TEST_DST %rdi 8 .E_write_words
|
|
-.L_write_words:
|
|
- movq %r8, (%rdi)
|
|
- addq $8, %rsi
|
|
- addq $8, %rdi
|
|
- decl %ecx
|
|
- jnz .L_read_words
|
|
-
|
|
- /* Any trailing bytes? */
|
|
-.L_no_whole_words:
|
|
- andl %edx, %edx
|
|
- jz .L_done_memcpy_trap
|
|
-
|
|
- /* Copy trailing bytes */
|
|
- movl %edx, %ecx
|
|
-.L_read_trailing_bytes:
|
|
- movb (%rsi), %al
|
|
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
|
|
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
|
|
-.L_write_trailing_bytes:
|
|
- movb %al, (%rdi)
|
|
- incq %rsi
|
|
- incq %rdi
|
|
- decl %ecx
|
|
- jnz .L_read_trailing_bytes
|
|
-
|
|
- /* Copy successful. Return zero */
|
|
-.L_done_memcpy_trap:
|
|
- xorl %eax, %eax
|
|
-.L_done:
|
|
- ret
|
|
-SYM_FUNC_END(__memcpy_mcsafe)
|
|
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
|
|
-
|
|
- .section .fixup, "ax"
|
|
- /*
|
|
- * Return number of bytes not copied for any failure. Note that
|
|
- * there is no "tail" handling since the source buffer is 8-byte
|
|
- * aligned and poison is cacheline aligned.
|
|
- */
|
|
-.E_read_words:
|
|
- shll $3, %ecx
|
|
-.E_leading_bytes:
|
|
- addl %edx, %ecx
|
|
-.E_trailing_bytes:
|
|
- mov %ecx, %eax
|
|
- jmp .L_done
|
|
-
|
|
- /*
|
|
- * For write fault handling, given the destination is unaligned,
|
|
- * we handle faults on multi-byte writes with a byte-by-byte
|
|
- * copy up to the write-protected page.
|
|
- */
|
|
-.E_write_words:
|
|
- shll $3, %ecx
|
|
- addl %edx, %ecx
|
|
- movl %ecx, %edx
|
|
- jmp mcsafe_handle_tail
|
|
-
|
|
- .previous
|
|
-
|
|
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
|
|
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
|
|
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
|
|
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
|
|
- _ASM_EXTABLE(.L_write_words, .E_write_words)
|
|
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
|
|
-#endif
|
|
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
|
|
index a3fd55194e0b1..7bffadcfd6eb0 100644
|
|
--- a/tools/include/uapi/linux/bpf.h
|
|
+++ b/tools/include/uapi/linux/bpf.h
|
|
@@ -1416,8 +1416,8 @@ union bpf_attr {
|
|
* Return
|
|
* The return value depends on the result of the test, and can be:
|
|
*
|
|
- * * 0, if the *skb* task belongs to the cgroup2.
|
|
- * * 1, if the *skb* task does not belong to the cgroup2.
|
|
+ * * 0, if current task belongs to the cgroup2.
|
|
+ * * 1, if current task does not belong to the cgroup2.
|
|
* * A negative error code, if an error occurred.
|
|
*
|
|
* int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
|
|
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
|
|
index 773e6c7ee5f93..0ed92c3b19266 100644
|
|
--- a/tools/objtool/check.c
|
|
+++ b/tools/objtool/check.c
|
|
@@ -548,8 +548,9 @@ static const char *uaccess_safe_builtin[] = {
|
|
"__ubsan_handle_shift_out_of_bounds",
|
|
/* misc */
|
|
"csum_partial_copy_generic",
|
|
- "__memcpy_mcsafe",
|
|
- "mcsafe_handle_tail",
|
|
+ "copy_mc_fragile",
|
|
+ "copy_mc_fragile_handle_tail",
|
|
+ "copy_mc_enhanced_fast_string",
|
|
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
|
|
NULL
|
|
};
|
|
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
|
|
index 768e408757a05..5352303518e1f 100644
|
|
--- a/tools/perf/bench/Build
|
|
+++ b/tools/perf/bench/Build
|
|
@@ -11,7 +11,6 @@ perf-y += epoll-ctl.o
|
|
perf-y += synthesize.o
|
|
perf-y += kallsyms-parse.o
|
|
|
|
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
|
|
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
|
|
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
|
|
|
|
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
|
|
deleted file mode 100644
|
|
index 4130734dde84b..0000000000000
|
|
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
|
|
+++ /dev/null
|
|
@@ -1,24 +0,0 @@
|
|
-/*
|
|
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
|
|
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
|
|
- * happy.
|
|
- */
|
|
-#include <linux/types.h>
|
|
-
|
|
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
|
|
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
|
|
-
|
|
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
|
|
-{
|
|
- for (; len; --len, to++, from++) {
|
|
- /*
|
|
- * Call the assembly routine back directly since
|
|
- * memcpy_mcsafe() may silently fallback to memcpy.
|
|
- */
|
|
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
|
|
-
|
|
- if (rem)
|
|
- break;
|
|
- }
|
|
- return len;
|
|
-}
|
|
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
|
|
index a8ee5c4d41ebb..50a390d87db26 100644
|
|
--- a/tools/testing/nvdimm/test/nfit.c
|
|
+++ b/tools/testing/nvdimm/test/nfit.c
|
|
@@ -23,7 +23,8 @@
|
|
#include "nfit_test.h"
|
|
#include "../watermark.h"
|
|
|
|
-#include <asm/mcsafe_test.h>
|
|
+#include <asm/copy_mc_test.h>
|
|
+#include <asm/mce.h>
|
|
|
|
/*
|
|
* Generate an NFIT table to describe the following topology:
|
|
@@ -3052,7 +3053,7 @@ static struct platform_driver nfit_test_driver = {
|
|
.id_table = nfit_test_id,
|
|
};
|
|
|
|
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
|
|
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
|
|
|
|
enum INJECT {
|
|
INJECT_NONE,
|
|
@@ -3060,7 +3061,7 @@ enum INJECT {
|
|
INJECT_DST,
|
|
};
|
|
|
|
-static void mcsafe_test_init(char *dst, char *src, size_t size)
|
|
+static void copy_mc_test_init(char *dst, char *src, size_t size)
|
|
{
|
|
size_t i;
|
|
|
|
@@ -3069,7 +3070,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
|
|
src[i] = (char) i;
|
|
}
|
|
|
|
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
|
|
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
|
|
size_t size, unsigned long rem)
|
|
{
|
|
size_t i;
|
|
@@ -3090,12 +3091,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
|
|
return true;
|
|
}
|
|
|
|
-void mcsafe_test(void)
|
|
+void copy_mc_test(void)
|
|
{
|
|
char *inject_desc[] = { "none", "source", "destination" };
|
|
enum INJECT inj;
|
|
|
|
- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
|
|
+ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
|
|
pr_info("%s: run...\n", __func__);
|
|
} else {
|
|
pr_info("%s: disabled, skip.\n", __func__);
|
|
@@ -3113,31 +3114,31 @@ void mcsafe_test(void)
|
|
|
|
switch (inj) {
|
|
case INJECT_NONE:
|
|
- mcsafe_inject_src(NULL);
|
|
- mcsafe_inject_dst(NULL);
|
|
- dst = &mcsafe_buf[2048];
|
|
- src = &mcsafe_buf[1024 - i];
|
|
+ copy_mc_inject_src(NULL);
|
|
+ copy_mc_inject_dst(NULL);
|
|
+ dst = ©_mc_buf[2048];
|
|
+ src = ©_mc_buf[1024 - i];
|
|
expect = 0;
|
|
break;
|
|
case INJECT_SRC:
|
|
- mcsafe_inject_src(&mcsafe_buf[1024]);
|
|
- mcsafe_inject_dst(NULL);
|
|
- dst = &mcsafe_buf[2048];
|
|
- src = &mcsafe_buf[1024 - i];
|
|
+ copy_mc_inject_src(©_mc_buf[1024]);
|
|
+ copy_mc_inject_dst(NULL);
|
|
+ dst = ©_mc_buf[2048];
|
|
+ src = ©_mc_buf[1024 - i];
|
|
expect = 512 - i;
|
|
break;
|
|
case INJECT_DST:
|
|
- mcsafe_inject_src(NULL);
|
|
- mcsafe_inject_dst(&mcsafe_buf[2048]);
|
|
- dst = &mcsafe_buf[2048 - i];
|
|
- src = &mcsafe_buf[1024];
|
|
+ copy_mc_inject_src(NULL);
|
|
+ copy_mc_inject_dst(©_mc_buf[2048]);
|
|
+ dst = ©_mc_buf[2048 - i];
|
|
+ src = ©_mc_buf[1024];
|
|
expect = 512 - i;
|
|
break;
|
|
}
|
|
|
|
- mcsafe_test_init(dst, src, 512);
|
|
- rem = __memcpy_mcsafe(dst, src, 512);
|
|
- valid = mcsafe_test_validate(dst, src, 512, expect);
|
|
+ copy_mc_test_init(dst, src, 512);
|
|
+ rem = copy_mc_fragile(dst, src, 512);
|
|
+ valid = copy_mc_test_validate(dst, src, 512, expect);
|
|
if (rem == expect && valid)
|
|
continue;
|
|
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
|
|
@@ -3149,8 +3150,8 @@ void mcsafe_test(void)
|
|
}
|
|
}
|
|
|
|
- mcsafe_inject_src(NULL);
|
|
- mcsafe_inject_dst(NULL);
|
|
+ copy_mc_inject_src(NULL);
|
|
+ copy_mc_inject_dst(NULL);
|
|
}
|
|
|
|
static __init int nfit_test_init(void)
|
|
@@ -3161,7 +3162,7 @@ static __init int nfit_test_init(void)
|
|
libnvdimm_test();
|
|
acpi_nfit_test();
|
|
device_dax_test();
|
|
- mcsafe_test();
|
|
+ copy_mc_test();
|
|
dax_pmem_test();
|
|
dax_pmem_core_test();
|
|
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
|
|
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
|
|
index ddaf140b82553..994b11af765ce 100644
|
|
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
|
|
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
|
|
@@ -12,4 +12,4 @@ memcpy_p7_t1
|
|
copyuser_64_exc_t0
|
|
copyuser_64_exc_t1
|
|
copyuser_64_exc_t2
|
|
-memcpy_mcsafe_64
|
|
+copy_mc_64
|
|
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
|
|
index 0917983a1c781..3095b1f1c02b3 100644
|
|
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
|
|
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
|
|
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
|
|
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
|
|
copyuser_p7_t0 copyuser_p7_t1 \
|
|
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
|
|
- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
|
|
+ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
|
|
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
|
|
|
|
EXTRA_SOURCES := validate.c ../harness.c stubs.S
|
|
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
|
|
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
|
|
-o $@ $^
|
|
|
|
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
|
|
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
|
|
$(CC) $(CPPFLAGS) $(CFLAGS) \
|
|
- -D COPY_LOOP=test_memcpy_mcsafe \
|
|
+ -D COPY_LOOP=test_copy_mc_generic \
|
|
-o $@ $^
|
|
|
|
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
|
|
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
|
|
new file mode 100644
|
|
index 0000000000000..88d46c471493b
|
|
--- /dev/null
|
|
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
|
|
@@ -0,0 +1,242 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 */
|
|
+/*
|
|
+ * Copyright (C) IBM Corporation, 2011
|
|
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
|
|
+ * Author - Balbir Singh <bsingharora@gmail.com>
|
|
+ */
|
|
+#include <asm/ppc_asm.h>
|
|
+#include <asm/errno.h>
|
|
+#include <asm/export.h>
|
|
+
|
|
+ .macro err1
|
|
+100:
|
|
+ EX_TABLE(100b,.Ldo_err1)
|
|
+ .endm
|
|
+
|
|
+ .macro err2
|
|
+200:
|
|
+ EX_TABLE(200b,.Ldo_err2)
|
|
+ .endm
|
|
+
|
|
+ .macro err3
|
|
+300: EX_TABLE(300b,.Ldone)
|
|
+ .endm
|
|
+
|
|
+.Ldo_err2:
|
|
+ ld r22,STK_REG(R22)(r1)
|
|
+ ld r21,STK_REG(R21)(r1)
|
|
+ ld r20,STK_REG(R20)(r1)
|
|
+ ld r19,STK_REG(R19)(r1)
|
|
+ ld r18,STK_REG(R18)(r1)
|
|
+ ld r17,STK_REG(R17)(r1)
|
|
+ ld r16,STK_REG(R16)(r1)
|
|
+ ld r15,STK_REG(R15)(r1)
|
|
+ ld r14,STK_REG(R14)(r1)
|
|
+ addi r1,r1,STACKFRAMESIZE
|
|
+.Ldo_err1:
|
|
+ /* Do a byte by byte copy to get the exact remaining size */
|
|
+ mtctr r7
|
|
+46:
|
|
+err3; lbz r0,0(r4)
|
|
+ addi r4,r4,1
|
|
+err3; stb r0,0(r3)
|
|
+ addi r3,r3,1
|
|
+ bdnz 46b
|
|
+ li r3,0
|
|
+ blr
|
|
+
|
|
+.Ldone:
|
|
+ mfctr r3
|
|
+ blr
|
|
+
|
|
+
|
|
+_GLOBAL(copy_mc_generic)
|
|
+ mr r7,r5
|
|
+ cmpldi r5,16
|
|
+ blt .Lshort_copy
|
|
+
|
|
+.Lcopy:
|
|
+ /* Get the source 8B aligned */
|
|
+ neg r6,r4
|
|
+ mtocrf 0x01,r6
|
|
+ clrldi r6,r6,(64-3)
|
|
+
|
|
+ bf cr7*4+3,1f
|
|
+err1; lbz r0,0(r4)
|
|
+ addi r4,r4,1
|
|
+err1; stb r0,0(r3)
|
|
+ addi r3,r3,1
|
|
+ subi r7,r7,1
|
|
+
|
|
+1: bf cr7*4+2,2f
|
|
+err1; lhz r0,0(r4)
|
|
+ addi r4,r4,2
|
|
+err1; sth r0,0(r3)
|
|
+ addi r3,r3,2
|
|
+ subi r7,r7,2
|
|
+
|
|
+2: bf cr7*4+1,3f
|
|
+err1; lwz r0,0(r4)
|
|
+ addi r4,r4,4
|
|
+err1; stw r0,0(r3)
|
|
+ addi r3,r3,4
|
|
+ subi r7,r7,4
|
|
+
|
|
+3: sub r5,r5,r6
|
|
+ cmpldi r5,128
|
|
+
|
|
+ mflr r0
|
|
+ stdu r1,-STACKFRAMESIZE(r1)
|
|
+ std r14,STK_REG(R14)(r1)
|
|
+ std r15,STK_REG(R15)(r1)
|
|
+ std r16,STK_REG(R16)(r1)
|
|
+ std r17,STK_REG(R17)(r1)
|
|
+ std r18,STK_REG(R18)(r1)
|
|
+ std r19,STK_REG(R19)(r1)
|
|
+ std r20,STK_REG(R20)(r1)
|
|
+ std r21,STK_REG(R21)(r1)
|
|
+ std r22,STK_REG(R22)(r1)
|
|
+ std r0,STACKFRAMESIZE+16(r1)
|
|
+
|
|
+ blt 5f
|
|
+ srdi r6,r5,7
|
|
+ mtctr r6
|
|
+
|
|
+ /* Now do cacheline (128B) sized loads and stores. */
|
|
+ .align 5
|
|
+4:
|
|
+err2; ld r0,0(r4)
|
|
+err2; ld r6,8(r4)
|
|
+err2; ld r8,16(r4)
|
|
+err2; ld r9,24(r4)
|
|
+err2; ld r10,32(r4)
|
|
+err2; ld r11,40(r4)
|
|
+err2; ld r12,48(r4)
|
|
+err2; ld r14,56(r4)
|
|
+err2; ld r15,64(r4)
|
|
+err2; ld r16,72(r4)
|
|
+err2; ld r17,80(r4)
|
|
+err2; ld r18,88(r4)
|
|
+err2; ld r19,96(r4)
|
|
+err2; ld r20,104(r4)
|
|
+err2; ld r21,112(r4)
|
|
+err2; ld r22,120(r4)
|
|
+ addi r4,r4,128
|
|
+err2; std r0,0(r3)
|
|
+err2; std r6,8(r3)
|
|
+err2; std r8,16(r3)
|
|
+err2; std r9,24(r3)
|
|
+err2; std r10,32(r3)
|
|
+err2; std r11,40(r3)
|
|
+err2; std r12,48(r3)
|
|
+err2; std r14,56(r3)
|
|
+err2; std r15,64(r3)
|
|
+err2; std r16,72(r3)
|
|
+err2; std r17,80(r3)
|
|
+err2; std r18,88(r3)
|
|
+err2; std r19,96(r3)
|
|
+err2; std r20,104(r3)
|
|
+err2; std r21,112(r3)
|
|
+err2; std r22,120(r3)
|
|
+ addi r3,r3,128
|
|
+ subi r7,r7,128
|
|
+ bdnz 4b
|
|
+
|
|
+ clrldi r5,r5,(64-7)
|
|
+
|
|
+ /* Up to 127B to go */
|
|
+5: srdi r6,r5,4
|
|
+ mtocrf 0x01,r6
|
|
+
|
|
+6: bf cr7*4+1,7f
|
|
+err2; ld r0,0(r4)
|
|
+err2; ld r6,8(r4)
|
|
+err2; ld r8,16(r4)
|
|
+err2; ld r9,24(r4)
|
|
+err2; ld r10,32(r4)
|
|
+err2; ld r11,40(r4)
|
|
+err2; ld r12,48(r4)
|
|
+err2; ld r14,56(r4)
|
|
+ addi r4,r4,64
|
|
+err2; std r0,0(r3)
|
|
+err2; std r6,8(r3)
|
|
+err2; std r8,16(r3)
|
|
+err2; std r9,24(r3)
|
|
+err2; std r10,32(r3)
|
|
+err2; std r11,40(r3)
|
|
+err2; std r12,48(r3)
|
|
+err2; std r14,56(r3)
|
|
+ addi r3,r3,64
|
|
+ subi r7,r7,64
|
|
+
|
|
+7: ld r14,STK_REG(R14)(r1)
|
|
+ ld r15,STK_REG(R15)(r1)
|
|
+ ld r16,STK_REG(R16)(r1)
|
|
+ ld r17,STK_REG(R17)(r1)
|
|
+ ld r18,STK_REG(R18)(r1)
|
|
+ ld r19,STK_REG(R19)(r1)
|
|
+ ld r20,STK_REG(R20)(r1)
|
|
+ ld r21,STK_REG(R21)(r1)
|
|
+ ld r22,STK_REG(R22)(r1)
|
|
+ addi r1,r1,STACKFRAMESIZE
|
|
+
|
|
+ /* Up to 63B to go */
|
|
+ bf cr7*4+2,8f
|
|
+err1; ld r0,0(r4)
|
|
+err1; ld r6,8(r4)
|
|
+err1; ld r8,16(r4)
|
|
+err1; ld r9,24(r4)
|
|
+ addi r4,r4,32
|
|
+err1; std r0,0(r3)
|
|
+err1; std r6,8(r3)
|
|
+err1; std r8,16(r3)
|
|
+err1; std r9,24(r3)
|
|
+ addi r3,r3,32
|
|
+ subi r7,r7,32
|
|
+
|
|
+ /* Up to 31B to go */
|
|
+8: bf cr7*4+3,9f
|
|
+err1; ld r0,0(r4)
|
|
+err1; ld r6,8(r4)
|
|
+ addi r4,r4,16
|
|
+err1; std r0,0(r3)
|
|
+err1; std r6,8(r3)
|
|
+ addi r3,r3,16
|
|
+ subi r7,r7,16
|
|
+
|
|
+9: clrldi r5,r5,(64-4)
|
|
+
|
|
+ /* Up to 15B to go */
|
|
+.Lshort_copy:
|
|
+ mtocrf 0x01,r5
|
|
+ bf cr7*4+0,12f
|
|
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
|
|
+err1; lwz r6,4(r4)
|
|
+ addi r4,r4,8
|
|
+err1; stw r0,0(r3)
|
|
+err1; stw r6,4(r3)
|
|
+ addi r3,r3,8
|
|
+ subi r7,r7,8
|
|
+
|
|
+12: bf cr7*4+1,13f
|
|
+err1; lwz r0,0(r4)
|
|
+ addi r4,r4,4
|
|
+err1; stw r0,0(r3)
|
|
+ addi r3,r3,4
|
|
+ subi r7,r7,4
|
|
+
|
|
+13: bf cr7*4+2,14f
|
|
+err1; lhz r0,0(r4)
|
|
+ addi r4,r4,2
|
|
+err1; sth r0,0(r3)
|
|
+ addi r3,r3,2
|
|
+ subi r7,r7,2
|
|
+
|
|
+14: bf cr7*4+3,15f
|
|
+err1; lbz r0,0(r4)
|
|
+err1; stb r0,0(r3)
|
|
+
|
|
+15: li r3,0
|
|
+ blr
|
|
+
|
|
+EXPORT_SYMBOL_GPL(copy_mc_generic);
|