Merge branch 'x86/mm'

Depends on the simplifications from commit 1d7e707af4 ("Revert "x86/module: prepare module loading for ROX allocations of text"")

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
This commit is contained in:
Peter Zijlstra 2025-02-14 10:32:04 +01:00
commit 92d2da37fd
20 changed files with 426 additions and 271 deletions

View File

@ -440,25 +440,24 @@ void __init arch_cpu_finalize_init(void)
os_check_bugs();
}
void apply_seal_endbr(s32 *start, s32 *end, struct module *mod)
void apply_seal_endbr(s32 *start, s32 *end)
{
}
void apply_retpolines(s32 *start, s32 *end, struct module *mod)
void apply_retpolines(s32 *start, s32 *end)
{
}
void apply_returns(s32 *start, s32 *end, struct module *mod)
void apply_returns(s32 *start, s32 *end)
{
}
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, struct module *mod)
s32 *start_cfi, s32 *end_cfi)
{
}
void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
struct module *mod)
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}

View File

@ -85,6 +85,7 @@ config X86
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
select ARCH_HAS_EARLY_DEBUG if KGDB
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_EXECMEM_ROX if X86_64
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL

View File

@ -48,8 +48,7 @@ int __init init_vdso_image(const struct vdso_image *image)
apply_alternatives((struct alt_instr *)(image->data + image->alt),
(struct alt_instr *)(image->data + image->alt +
image->alt_len),
NULL);
image->alt_len));
return 0;
}

View File

@ -87,16 +87,16 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
* instructions were patched in already:
*/
extern int alternatives_patched;
struct module;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
struct module *mod);
extern void apply_retpolines(s32 *start, s32 *end, struct module *mod);
extern void apply_returns(s32 *start, s32 *end, struct module *mod);
extern void apply_seal_endbr(s32 *start, s32 *end, struct module *mod);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
extern void apply_returns(s32 *start, s32 *end);
extern void apply_seal_endbr(s32 *start, s32 *end);
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
s32 *start_cfi, s32 *end_cfi, struct module *mod);
s32 *start_cfi, s32 *end_cfi);
struct module;
struct callthunk_sites {
s32 *call_start, *call_end;

View File

@ -33,6 +33,7 @@
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
#ifdef CONFIG_X86_64
@ -64,6 +65,7 @@
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K)
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0)
#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1)

View File

@ -242,7 +242,7 @@ void flush_tlb_multi(const struct cpumask *cpumask,
flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
: PAGE_SHIFT, false)
: PAGE_SHIFT, true)
extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,

View File

@ -392,10 +392,8 @@ EXPORT_SYMBOL(BUG_func);
* Rewrite the "call BUG_func" replacement to point to the target of the
* indirect pv_ops call "call *disp(%ip)".
*/
static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a,
struct module *mod)
static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
{
u8 *wr_instr = module_writable_address(mod, instr);
void *target, *bug = &BUG_func;
s32 disp;
@ -405,14 +403,14 @@ static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a,
}
if (a->instrlen != 6 ||
wr_instr[0] != CALL_RIP_REL_OPCODE ||
wr_instr[1] != CALL_RIP_REL_MODRM) {
instr[0] != CALL_RIP_REL_OPCODE ||
instr[1] != CALL_RIP_REL_MODRM) {
pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
BUG();
}
/* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */
disp = *(s32 *)(wr_instr + 2);
disp = *(s32 *)(instr + 2);
#ifdef CONFIG_X86_64
/* ff 15 00 00 00 00 call *0x0(%rip) */
/* target address is stored at "next instruction + disp". */
@ -450,8 +448,7 @@ static inline u8 * instr_va(struct alt_instr *i)
* to refetch changed I$ lines.
*/
void __init_or_module noinline apply_alternatives(struct alt_instr *start,
struct alt_instr *end,
struct module *mod)
struct alt_instr *end)
{
u8 insn_buff[MAX_PATCH_LEN];
u8 *instr, *replacement;
@ -480,7 +477,6 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
*/
for (a = start; a < end; a++) {
int insn_buff_sz = 0;
u8 *wr_instr, *wr_replacement;
/*
* In case of nested ALTERNATIVE()s the outer alternative might
@ -494,11 +490,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
}
instr = instr_va(a);
wr_instr = module_writable_address(mod, instr);
replacement = (u8 *)&a->repl_offset + a->repl_offset;
wr_replacement = module_writable_address(mod, replacement);
BUG_ON(a->instrlen > sizeof(insn_buff));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
@ -509,9 +501,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* patch if feature is *NOT* present.
*/
if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
memcpy(insn_buff, wr_instr, a->instrlen);
memcpy(insn_buff, instr, a->instrlen);
optimize_nops(instr, insn_buff, a->instrlen);
text_poke_early(wr_instr, insn_buff, a->instrlen);
text_poke_early(instr, insn_buff, a->instrlen);
continue;
}
@ -521,12 +513,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
instr, instr, a->instrlen,
replacement, a->replacementlen, a->flags);
memcpy(insn_buff, wr_replacement, a->replacementlen);
memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
if (a->flags & ALT_FLAG_DIRECT_CALL) {
insn_buff_sz = alt_replace_call(instr, insn_buff, a,
mod);
insn_buff_sz = alt_replace_call(instr, insn_buff, a);
if (insn_buff_sz < 0)
continue;
}
@ -536,11 +527,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
DUMP_BYTES(ALT, wr_instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
text_poke_early(wr_instr, insn_buff, insn_buff_sz);
text_poke_early(instr, insn_buff, insn_buff_sz);
}
kasan_enable_current();
@ -731,20 +722,18 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
/*
* Generated by 'objtool --retpoline'.
*/
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
struct module *mod)
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr = module_writable_address(mod, addr);
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op1, op2;
ret = insn_decode_kernel(&insn, wr_addr);
ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
continue;
@ -772,9 +761,9 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(addr, bytes, len);
DUMP_BYTES(RETPOLINE, ((u8*)wr_addr), len, "%px: orig: ", addr);
DUMP_BYTES(RETPOLINE, ((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
text_poke_early(wr_addr, bytes, len);
text_poke_early(addr, bytes, len);
}
}
}
@ -810,8 +799,7 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
return i;
}
void __init_or_module noinline apply_returns(s32 *start, s32 *end,
struct module *mod)
void __init_or_module noinline apply_returns(s32 *start, s32 *end)
{
s32 *s;
@ -820,13 +808,12 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end,
for (s = start; s < end; s++) {
void *dest = NULL, *addr = (void *)s + *s;
void *wr_addr = module_writable_address(mod, addr);
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op;
ret = insn_decode_kernel(&insn, wr_addr);
ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
continue;
@ -846,35 +833,32 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end,
len = patch_return(addr, &insn, bytes);
if (len == insn.length) {
DUMP_BYTES(RET, ((u8*)wr_addr), len, "%px: orig: ", addr);
DUMP_BYTES(RET, ((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr);
text_poke_early(wr_addr, bytes, len);
text_poke_early(addr, bytes, len);
}
}
}
#else
void __init_or_module noinline apply_returns(s32 *start, s32 *end,
struct module *mod) { }
void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#endif /* CONFIG_MITIGATION_RETHUNK */
#else /* !CONFIG_MITIGATION_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end,
struct module *mod) { }
void __init_or_module noinline apply_returns(s32 *start, s32 *end,
struct module *mod) { }
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#endif /* CONFIG_MITIGATION_RETPOLINE && CONFIG_OBJTOOL */
#ifdef CONFIG_X86_KERNEL_IBT
static void poison_cfi(void *addr, void *wr_addr);
static void poison_cfi(void *addr);
static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
static void __init_or_module poison_endbr(void *addr, bool warn)
{
u32 endbr, poison = gen_endbr_poison();
if (WARN_ON_ONCE(get_kernel_nofault(endbr, wr_addr)))
if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
return;
if (!is_endbr(endbr)) {
@ -889,7 +873,7 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
*/
DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr);
text_poke_early(wr_addr, &poison, 4);
text_poke_early(addr, &poison, 4);
}
/*
@ -898,23 +882,22 @@ static void __init_or_module poison_endbr(void *addr, void *wr_addr, bool warn)
* Seal the functions for indirect calls by clobbering the ENDBR instructions
* and the kCFI hash value.
*/
void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end, struct module *mod)
void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr = module_writable_address(mod, addr);
poison_endbr(addr, wr_addr, true);
poison_endbr(addr, true);
if (IS_ENABLED(CONFIG_FINEIBT))
poison_cfi(addr - 16, wr_addr - 16);
poison_cfi(addr - 16);
}
}
#else
void __init_or_module apply_seal_endbr(s32 *start, s32 *end, struct module *mod) { }
void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
#endif /* CONFIG_X86_KERNEL_IBT */
@ -1136,7 +1119,7 @@ static u32 decode_caller_hash(void *addr)
}
/* .retpoline_sites */
static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod)
static int cfi_disable_callers(s32 *start, s32 *end)
{
/*
* Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
@ -1148,23 +1131,20 @@ static int cfi_disable_callers(s32 *start, s32 *end, struct module *mod)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
wr_addr = module_writable_address(mod, addr);
hash = decode_caller_hash(wr_addr);
hash = decode_caller_hash(addr);
if (!hash) /* nocfi callers */
continue;
text_poke_early(wr_addr, jmp, 2);
text_poke_early(addr, jmp, 2);
}
return 0;
}
static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod)
static int cfi_enable_callers(s32 *start, s32 *end)
{
/*
* Re-enable kCFI, undo what cfi_disable_callers() did.
@ -1174,115 +1154,106 @@ static int cfi_enable_callers(s32 *start, s32 *end, struct module *mod)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
wr_addr = module_writable_address(mod, addr);
hash = decode_caller_hash(wr_addr);
hash = decode_caller_hash(addr);
if (!hash) /* nocfi callers */
continue;
text_poke_early(wr_addr, mov, 2);
text_poke_early(addr, mov, 2);
}
return 0;
}
/* .cfi_sites */
static int cfi_rand_preamble(s32 *start, s32 *end, struct module *mod)
static int cfi_rand_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr = module_writable_address(mod, addr);
u32 hash;
hash = decode_preamble_hash(wr_addr);
hash = decode_preamble_hash(addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
hash = cfi_rehash(hash);
text_poke_early(wr_addr + 1, &hash, 4);
text_poke_early(addr + 1, &hash, 4);
}
return 0;
}
static int cfi_rewrite_preamble(s32 *start, s32 *end, struct module *mod)
static int cfi_rewrite_preamble(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr = module_writable_address(mod, addr);
u32 hash;
hash = decode_preamble_hash(wr_addr);
hash = decode_preamble_hash(addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
return -EINVAL;
text_poke_early(wr_addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(wr_addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(wr_addr + fineibt_preamble_hash, &hash, 4);
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
}
return 0;
}
static void cfi_rewrite_endbr(s32 *start, s32 *end, struct module *mod)
static void cfi_rewrite_endbr(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr = module_writable_address(mod, addr);
poison_endbr(addr + 16, wr_addr + 16, false);
poison_endbr(addr+16, false);
}
}
/* .retpoline_sites */
static int cfi_rand_callers(s32 *start, s32 *end, struct module *mod)
static int cfi_rand_callers(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
wr_addr = module_writable_address(mod, addr);
hash = decode_caller_hash(wr_addr);
hash = decode_caller_hash(addr);
if (hash) {
hash = -cfi_rehash(hash);
text_poke_early(wr_addr + 2, &hash, 4);
text_poke_early(addr + 2, &hash, 4);
}
}
return 0;
}
static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod)
static int cfi_rewrite_callers(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
void *wr_addr;
u32 hash;
addr -= fineibt_caller_size;
wr_addr = module_writable_address(mod, addr);
hash = decode_caller_hash(wr_addr);
hash = decode_caller_hash(addr);
if (hash) {
text_poke_early(wr_addr, fineibt_caller_start, fineibt_caller_size);
WARN_ON(*(u32 *)(wr_addr + fineibt_caller_hash) != 0x12345678);
text_poke_early(wr_addr + fineibt_caller_hash, &hash, 4);
text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
text_poke_early(addr + fineibt_caller_hash, &hash, 4);
}
/* rely on apply_retpolines() */
}
@ -1291,9 +1262,8 @@ static int cfi_rewrite_callers(s32 *start, s32 *end, struct module *mod)
}
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, struct module *mod)
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
bool builtin = mod ? false : true;
int ret;
if (WARN_ONCE(fineibt_preamble_size != 16,
@ -1311,7 +1281,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
* rewrite them. This disables all CFI. If this succeeds but any of the
* later stages fails, we're without CFI.
*/
ret = cfi_disable_callers(start_retpoline, end_retpoline, mod);
ret = cfi_disable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
@ -1322,11 +1292,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
}
ret = cfi_rand_preamble(start_cfi, end_cfi, mod);
ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
goto err;
ret = cfi_rand_callers(start_retpoline, end_retpoline, mod);
ret = cfi_rand_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
}
@ -1338,7 +1308,7 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
return;
case CFI_KCFI:
ret = cfi_enable_callers(start_retpoline, end_retpoline, mod);
ret = cfi_enable_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
@ -1348,17 +1318,17 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
case CFI_FINEIBT:
/* place the FineIBT preamble at func()-16 */
ret = cfi_rewrite_preamble(start_cfi, end_cfi, mod);
ret = cfi_rewrite_preamble(start_cfi, end_cfi);
if (ret)
goto err;
/* rewrite the callers to target func()-16 */
ret = cfi_rewrite_callers(start_retpoline, end_retpoline, mod);
ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
if (ret)
goto err;
/* now that nobody targets func()+0, remove ENDBR there */
cfi_rewrite_endbr(start_cfi, end_cfi, mod);
cfi_rewrite_endbr(start_cfi, end_cfi);
if (builtin)
pr_info("Using FineIBT CFI\n");
@ -1377,7 +1347,7 @@ static inline void poison_hash(void *addr)
*(u32 *)addr = 0;
}
static void poison_cfi(void *addr, void *wr_addr)
static void poison_cfi(void *addr)
{
switch (cfi_mode) {
case CFI_FINEIBT:
@ -1389,8 +1359,8 @@ static void poison_cfi(void *addr, void *wr_addr)
* ud2
* 1: nop
*/
poison_endbr(addr, wr_addr, false);
poison_hash(wr_addr + fineibt_preamble_hash);
poison_endbr(addr, false);
poison_hash(addr + fineibt_preamble_hash);
break;
case CFI_KCFI:
@ -1399,7 +1369,7 @@ static void poison_cfi(void *addr, void *wr_addr)
* movl $0, %eax
* .skip 11, 0x90
*/
poison_hash(wr_addr + 1);
poison_hash(addr + 1);
break;
default:
@ -1410,21 +1380,22 @@ static void poison_cfi(void *addr, void *wr_addr)
#else
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, struct module *mod)
s32 *start_cfi, s32 *end_cfi, bool builtin)
{
}
#ifdef CONFIG_X86_KERNEL_IBT
static void poison_cfi(void *addr, void *wr_addr) { }
static void poison_cfi(void *addr) { }
#endif
#endif
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
s32 *start_cfi, s32 *end_cfi, struct module *mod)
s32 *start_cfi, s32 *end_cfi)
{
return __apply_fineibt(start_retpoline, end_retpoline,
start_cfi, end_cfi, mod);
start_cfi, end_cfi,
/* .builtin = */ false);
}
#ifdef CONFIG_SMP
@ -1721,16 +1692,16 @@ void __init alternative_instructions(void)
paravirt_set_cap();
__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
__cfi_sites, __cfi_sites_end, NULL);
__cfi_sites, __cfi_sites_end, true);
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
apply_returns(__return_sites, __return_sites_end, NULL);
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
apply_alternatives(__alt_instructions, __alt_instructions_end, NULL);
apply_alternatives(__alt_instructions, __alt_instructions_end);
/*
* Now all calls are established. Apply the call thunks if
@ -1741,7 +1712,7 @@ void __init alternative_instructions(void)
/*
* Seal all functions that do not have their address taken.
*/
apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end, NULL);
apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
#ifdef CONFIG_SMP
/* Patch to UP if other cpus not imminent. */

View File

@ -118,13 +118,10 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code,
return ret;
/* replace the text with the new text */
if (ftrace_poke_late) {
if (ftrace_poke_late)
text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
} else {
mutex_lock(&text_mutex);
text_poke((void *)ip, new_code, MCOUNT_INSN_SIZE);
mutex_unlock(&text_mutex);
}
else
text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
return 0;
}
@ -321,7 +318,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
union ftrace_op_code_union op_ptr;
void *ret;
int ret;
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
start_offset = (unsigned long)ftrace_regs_caller;
@ -352,15 +349,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
/* Copy ftrace_caller onto the trampoline memory */
ret = text_poke_copy(trampoline, (void *)start_offset, size);
if (WARN_ON(!ret))
ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size);
if (WARN_ON(ret < 0))
goto fail;
ip = trampoline + size;
if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
text_poke_copy(ip, retq, sizeof(retq));
memcpy(ip, retq, sizeof(retq));
/* No need to test direct calls on created trampolines */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
@ -368,7 +365,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
ip = trampoline + (jmp_offset - start_offset);
if (WARN_ON(*(char *)ip != 0x75))
goto fail;
if (!text_poke_copy(ip, x86_nops[2], 2))
ret = copy_from_kernel_nofault(ip, x86_nops[2], 2);
if (ret < 0)
goto fail;
}
@ -381,7 +379,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
*/
ptr = (unsigned long *)(trampoline + size + RET_SIZE);
text_poke_copy(ptr, &ops, sizeof(unsigned long));
*ptr = (unsigned long)ops;
op_offset -= start_offset;
memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
@ -397,7 +395,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
op_ptr.offset = offset;
/* put in the new offset to the ftrace_ops */
text_poke_copy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
/* put in the call to the function */
mutex_lock(&text_mutex);
@ -407,9 +405,9 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
* the depth accounting before the call already.
*/
dest = ftrace_ops_get_func(ops);
text_poke_copy_locked(trampoline + call_offset,
text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
CALL_INSN_SIZE, false);
memcpy(trampoline + call_offset,
text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
CALL_INSN_SIZE);
mutex_unlock(&text_mutex);
/* ALLOC_TRAMP flags lets us know we created it */

View File

@ -146,21 +146,18 @@ static int __write_relocate_add(Elf64_Shdr *sechdrs,
}
if (apply) {
void *wr_loc = module_writable_address(me, loc);
if (memcmp(wr_loc, &zero, size)) {
if (memcmp(loc, &zero, size)) {
pr_err("x86/modules: Invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
write(wr_loc, &val, size);
write(loc, &val, size);
} else {
if (memcmp(loc, &val, size)) {
pr_warn("x86/modules: Invalid relocation target, existing value does not match expected value for type %d, loc %p, val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), loc, val);
return -ENOEXEC;
}
/* FIXME: needs care for ROX module allocations */
write(loc, &zero, size);
}
}
@ -227,7 +224,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
const Elf_Shdr *s, *alt = NULL,
const Elf_Shdr *s, *alt = NULL, *locks = NULL,
*orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
@ -236,6 +233,8 @@ int module_finalize(const Elf_Ehdr *hdr,
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".altinstructions", secstrings + s->sh_name))
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks = s;
if (!strcmp(".orc_unwind", secstrings + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
@ -266,20 +265,20 @@ int module_finalize(const Elf_Ehdr *hdr,
csize = cfi->sh_size;
}
apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize, me);
apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
}
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size, me);
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
if (returns) {
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size, me);
apply_returns(rseg, rseg + returns->sh_size);
}
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size, me);
apply_alternatives(aseg, aseg + alt->sh_size);
}
if (calls || alt) {
struct callthunk_sites cs = {};
@ -298,28 +297,8 @@ int module_finalize(const Elf_Ehdr *hdr,
}
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size, me);
apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size);
}
if (orc && orc_ip)
unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
(void *)orc->sh_addr, orc->sh_size);
return 0;
}
int module_post_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
const Elf_Shdr *s, *locks = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks = s;
}
if (locks) {
void *lseg = (void *)locks->sh_addr;
void *text = me->mem[MOD_TEXT].base;
@ -329,6 +308,10 @@ int module_post_finalize(const Elf_Ehdr *hdr,
text, text_end);
}
if (orc && orc_ip)
unwind_module_init(me, (void *)orc_ip->sh_addr, orc_ip->sh_size,
(void *)orc->sh_addr, orc->sh_size);
return 0;
}

View File

@ -183,7 +183,7 @@ static int pageattr_test(void)
break;
case 1:
err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1);
err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1);
break;
case 2:

View File

@ -73,6 +73,7 @@ static DEFINE_SPINLOCK(cpa_lock);
#define CPA_ARRAY 2
#define CPA_PAGES_ARRAY 4
#define CPA_NO_CHECK_ALIAS 8 /* Do not search for aliases */
#define CPA_COLLAPSE 16 /* try to collapse large pages */
static inline pgprot_t cachemode2pgprot(enum page_cache_mode pcm)
{
@ -105,6 +106,18 @@ static void split_page_count(int level)
direct_pages_count[level - 1] += PTRS_PER_PTE;
}
static void collapse_page_count(int level)
{
direct_pages_count[level]++;
if (system_state == SYSTEM_RUNNING) {
if (level == PG_LEVEL_2M)
count_vm_event(DIRECT_MAP_LEVEL2_COLLAPSE);
else if (level == PG_LEVEL_1G)
count_vm_event(DIRECT_MAP_LEVEL3_COLLAPSE);
}
direct_pages_count[level - 1] -= PTRS_PER_PTE;
}
void arch_report_meminfo(struct seq_file *m)
{
seq_printf(m, "DirectMap4k: %8lu kB\n",
@ -122,6 +135,7 @@ void arch_report_meminfo(struct seq_file *m)
}
#else
static inline void split_page_count(int level) { }
static inline void collapse_page_count(int level) { }
#endif
#ifdef CONFIG_X86_CPA_STATISTICS
@ -394,16 +408,49 @@ static void __cpa_flush_tlb(void *data)
flush_tlb_one_kernel(fix_addr(__cpa_addr(cpa, i)));
}
static void cpa_flush(struct cpa_data *data, int cache)
static int collapse_large_pages(unsigned long addr, struct list_head *pgtables);
static void cpa_collapse_large_pages(struct cpa_data *cpa)
{
unsigned long start, addr, end;
struct ptdesc *ptdesc, *tmp;
LIST_HEAD(pgtables);
int collapsed = 0;
int i;
if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
for (i = 0; i < cpa->numpages; i++)
collapsed += collapse_large_pages(__cpa_addr(cpa, i),
&pgtables);
} else {
addr = __cpa_addr(cpa, 0);
start = addr & PMD_MASK;
end = addr + PAGE_SIZE * cpa->numpages;
for (addr = start; within(addr, start, end); addr += PMD_SIZE)
collapsed += collapse_large_pages(addr, &pgtables);
}
if (!collapsed)
return;
flush_tlb_all();
list_for_each_entry_safe(ptdesc, tmp, &pgtables, pt_list) {
list_del(&ptdesc->pt_list);
__free_page(ptdesc_page(ptdesc));
}
}
static void cpa_flush(struct cpa_data *cpa, int cache)
{
struct cpa_data *cpa = data;
unsigned int i;
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
if (cache && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
cpa_flush_all(cache);
return;
goto collapse_large_pages;
}
if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
@ -412,7 +459,7 @@ static void cpa_flush(struct cpa_data *data, int cache)
on_each_cpu(__cpa_flush_tlb, cpa, 1);
if (!cache)
return;
goto collapse_large_pages;
mb();
for (i = 0; i < cpa->numpages; i++) {
@ -428,6 +475,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
clflush_cache_range_opt((void *)fix_addr(addr), PAGE_SIZE);
}
mb();
collapse_large_pages:
if (cpa->flags & CPA_COLLAPSE)
cpa_collapse_large_pages(cpa);
}
static bool overlaps(unsigned long r1_start, unsigned long r1_end,
@ -1197,6 +1248,161 @@ static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
return 0;
}
static int collapse_pmd_page(pmd_t *pmd, unsigned long addr,
struct list_head *pgtables)
{
pmd_t _pmd, old_pmd;
pte_t *pte, first;
unsigned long pfn;
pgprot_t pgprot;
int i = 0;
addr &= PMD_MASK;
pte = pte_offset_kernel(pmd, addr);
first = *pte;
pfn = pte_pfn(first);
/* Make sure alignment is suitable */
if (PFN_PHYS(pfn) & ~PMD_MASK)
return 0;
/* The page is 4k intentionally */
if (pte_flags(first) & _PAGE_KERNEL_4K)
return 0;
/* Check that the rest of PTEs are compatible with the first one */
for (i = 1, pte++; i < PTRS_PER_PTE; i++, pte++) {
pte_t entry = *pte;
if (!pte_present(entry))
return 0;
if (pte_flags(entry) != pte_flags(first))
return 0;
if (pte_pfn(entry) != pte_pfn(first) + i)
return 0;
}
old_pmd = *pmd;
/* Success: set up a large page */
pgprot = pgprot_4k_2_large(pte_pgprot(first));
pgprot_val(pgprot) |= _PAGE_PSE;
_pmd = pfn_pmd(pfn, pgprot);
set_pmd(pmd, _pmd);
/* Queue the page table to be freed after TLB flush */
list_add(&page_ptdesc(pmd_page(old_pmd))->pt_list, pgtables);
if (IS_ENABLED(CONFIG_X86_32) && !SHARED_KERNEL_PMD) {
struct page *page;
/* Update all PGD tables to use the same large page */
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd = (pgd_t *)page_address(page) + pgd_index(addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr);
/* Something is wrong if entries doesn't match */
if (WARN_ON(pmd_val(old_pmd) != pmd_val(*pmd)))
continue;
set_pmd(pmd, _pmd);
}
}
if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1))
collapse_page_count(PG_LEVEL_2M);
return 1;
}
static int collapse_pud_page(pud_t *pud, unsigned long addr,
struct list_head *pgtables)
{
unsigned long pfn;
pmd_t *pmd, first;
int i;
if (!direct_gbpages)
return 0;
addr &= PUD_MASK;
pmd = pmd_offset(pud, addr);
first = *pmd;
/*
* To restore PUD page all PMD entries must be large and
* have suitable alignment
*/
pfn = pmd_pfn(first);
if (!pmd_leaf(first) || (PFN_PHYS(pfn) & ~PUD_MASK))
return 0;
/*
* To restore PUD page, all following PMDs must be compatible with the
* first one.
*/
for (i = 1, pmd++; i < PTRS_PER_PMD; i++, pmd++) {
pmd_t entry = *pmd;
if (!pmd_present(entry) || !pmd_leaf(entry))
return 0;
if (pmd_flags(entry) != pmd_flags(first))
return 0;
if (pmd_pfn(entry) != pmd_pfn(first) + i * PTRS_PER_PTE)
return 0;
}
/* Restore PUD page and queue page table to be freed after TLB flush */
list_add(&page_ptdesc(pud_page(*pud))->pt_list, pgtables);
set_pud(pud, pfn_pud(pfn, pmd_pgprot(first)));
if (virt_addr_valid(addr) && pfn_range_is_mapped(pfn, pfn + 1))
collapse_page_count(PG_LEVEL_1G);
return 1;
}
/*
* Collapse PMD and PUD pages in the kernel mapping around the address where
* possible.
*
* Caller must flush TLB and free page tables queued on the list before
* touching the new entries. CPU must not see TLB entries of different size
* with different attributes.
*/
static int collapse_large_pages(unsigned long addr, struct list_head *pgtables)
{
int collapsed = 0;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
addr &= PMD_MASK;
spin_lock(&pgd_lock);
pgd = pgd_offset_k(addr);
if (pgd_none(*pgd))
goto out;
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d))
goto out;
pud = pud_offset(p4d, addr);
if (!pud_present(*pud) || pud_leaf(*pud))
goto out;
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd) || pmd_leaf(*pmd))
goto out;
collapsed = collapse_pmd_page(pmd, addr, pgtables);
if (collapsed)
collapsed += collapse_pud_page(pud, addr, pgtables);
out:
spin_unlock(&pgd_lock);
return collapsed;
}
static bool try_to_free_pte_page(pte_t *pte)
{
int i;
@ -2120,7 +2326,8 @@ int set_memory_rox(unsigned long addr, int numpages)
if (__supported_pte_mask & _PAGE_NX)
clr.pgprot |= _PAGE_NX;
return change_page_attr_clear(&addr, numpages, clr, 0);
return change_page_attr_set_clr(&addr, numpages, __pgprot(0), clr, 0,
CPA_COLLAPSE, NULL);
}
int set_memory_rw(unsigned long addr, int numpages)
@ -2147,7 +2354,8 @@ int set_memory_p(unsigned long addr, int numpages)
int set_memory_4k(unsigned long addr, int numpages)
{
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
return change_page_attr_set_clr(&addr, numpages,
__pgprot(_PAGE_KERNEL_4K),
__pgprot(0), 1, 0, NULL);
}

View File

@ -1325,7 +1325,7 @@ bool nmi_uaccess_okay(void)
if (loaded_mm != current_mm)
return false;
VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
VM_WARN_ON_ONCE(__pa(current_mm->pgd) != read_cr3_pa());
return true;
}

View File

@ -65,6 +65,37 @@ enum execmem_range_flags {
* Architectures that use EXECMEM_ROX_CACHE must implement this.
*/
void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable);
/**
* execmem_make_temp_rw - temporarily remap region with read-write
* permissions
* @ptr: address of the region to remap
* @size: size of the region to remap
*
* Remaps a part of the cached large page in the ROX cache in the range
* [@ptr, @ptr + @size) as writable and not executable. The caller must
* have exclusive ownership of this range and ensure nothing will try to
* execute code in this range.
*
* Return: 0 on success or negative error code on failure.
*/
int execmem_make_temp_rw(void *ptr, size_t size);
/**
* execmem_restore_rox - restore read-only-execute permissions
* @ptr: address of the region to remap
* @size: size of the region to remap
*
* Restores read-only-execute permissions on a range [@ptr, @ptr + @size)
* after it was temporarily remapped as writable. Relies on architecture
* implementation of set_memory_rox() to restore mapping using large pages.
*
* Return: 0 on success or negative error code on failure.
*/
int execmem_restore_rox(void *ptr, size_t size);
#else
static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; }
static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; }
#endif
/**

View File

@ -370,7 +370,6 @@ enum mod_mem_type {
struct module_memory {
void *base;
void *rw_copy;
bool is_rox;
unsigned int size;
@ -772,16 +771,6 @@ static inline bool is_livepatch_module(struct module *mod)
void set_module_sig_enforced(void);
void *__module_writable_address(struct module *mod, void *loc);
static inline void *module_writable_address(struct module *mod, void *loc)
{
if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod ||
mod->state != MODULE_STATE_UNFORMED)
return loc;
return __module_writable_address(mod, loc);
}
#else /* !CONFIG_MODULES... */
static inline struct module *__module_address(unsigned long addr)
@ -889,11 +878,6 @@ static inline bool module_is_coming(struct module *mod)
{
return false;
}
static inline void *module_writable_address(struct module *mod, void *loc)
{
return loc;
}
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS

View File

@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod);
int module_post_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod);
#ifdef CONFIG_MODULES
void flush_module_init_free_work(void);
#else

View File

@ -151,6 +151,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_X86
DIRECT_MAP_LEVEL2_SPLIT,
DIRECT_MAP_LEVEL3_SPLIT,
DIRECT_MAP_LEVEL2_COLLAPSE,
DIRECT_MAP_LEVEL3_COLLAPSE,
#endif
#ifdef CONFIG_PER_VMA_LOCK_STATS
VMA_LOCK_SUCCESS,

View File

@ -1221,18 +1221,6 @@ void __weak module_arch_freeing_init(struct module *mod)
{
}
void *__module_writable_address(struct module *mod, void *loc)
{
for_class_mod_mem_type(type, text) {
struct module_memory *mem = &mod->mem[type];
if (loc >= mem->base && loc < mem->base + mem->size)
return loc + (mem->rw_copy - mem->base);
}
return loc;
}
static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
{
unsigned int size = PAGE_ALIGN(mod->mem[type].size);
@ -1250,21 +1238,15 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
if (!ptr)
return -ENOMEM;
mod->mem[type].base = ptr;
if (execmem_is_rox(execmem_type)) {
ptr = vzalloc(size);
int err = execmem_make_temp_rw(ptr, size);
if (!ptr) {
execmem_free(mod->mem[type].base);
if (err) {
execmem_free(ptr);
return -ENOMEM;
}
mod->mem[type].rw_copy = ptr;
mod->mem[type].is_rox = true;
} else {
mod->mem[type].rw_copy = mod->mem[type].base;
memset(mod->mem[type].base, 0, size);
}
/*
@ -1278,18 +1260,29 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
* *do* eventually get freed, but let's just keep things simple
* and avoid *any* false positives.
*/
kmemleak_not_leak(ptr);
if (!mod->mem[type].is_rox)
kmemleak_not_leak(ptr);
memset(ptr, 0, size);
mod->mem[type].base = ptr;
return 0;
}
static void module_memory_restore_rox(struct module *mod)
{
for_class_mod_mem_type(type, text) {
struct module_memory *mem = &mod->mem[type];
if (mem->is_rox)
execmem_restore_rox(mem->base, mem->size);
}
}
static void module_memory_free(struct module *mod, enum mod_mem_type type)
{
struct module_memory *mem = &mod->mem[type];
if (mem->is_rox)
vfree(mem->rw_copy);
execmem_free(mem->base);
}
@ -2642,7 +2635,6 @@ static int move_module(struct module *mod, struct load_info *info)
for_each_mod_mem_type(type) {
if (!mod->mem[type].size) {
mod->mem[type].base = NULL;
mod->mem[type].rw_copy = NULL;
continue;
}
@ -2659,7 +2651,6 @@ static int move_module(struct module *mod, struct load_info *info)
void *dest;
Elf_Shdr *shdr = &info->sechdrs[i];
const char *sname;
unsigned long addr;
if (!(shdr->sh_flags & SHF_ALLOC))
continue;
@ -2680,14 +2671,12 @@ static int move_module(struct module *mod, struct load_info *info)
ret = PTR_ERR(dest);
goto out_err;
}
addr = (unsigned long)dest;
codetag_section_found = true;
} else {
enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT;
unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK;
addr = (unsigned long)mod->mem[type].base + offset;
dest = mod->mem[type].rw_copy + offset;
dest = mod->mem[type].base + offset;
}
if (shdr->sh_type != SHT_NOBITS) {
@ -2710,13 +2699,14 @@ static int move_module(struct module *mod, struct load_info *info)
* users of info can keep taking advantage and using the newly
* minted official memory area.
*/
shdr->sh_addr = addr;
shdr->sh_addr = (unsigned long)dest;
pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr,
(long)shdr->sh_size, info->secstrings + shdr->sh_name);
}
return 0;
out_err:
module_memory_restore_rox(mod);
for (t--; t >= 0; t--)
module_memory_free(mod, t);
if (codetag_section_found)
@ -2863,17 +2853,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr,
return 0;
}
int __weak module_post_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
return 0;
}
static int post_relocation(struct module *mod, const struct load_info *info)
{
int ret;
/* Sort exception table now relocations are done. */
sort_extable(mod->extable, mod->extable + mod->num_exentries);
@ -2885,24 +2866,7 @@ static int post_relocation(struct module *mod, const struct load_info *info)
add_kallsyms(mod, info);
/* Arch-specific module finalizing. */
ret = module_finalize(info->hdr, info->sechdrs, mod);
if (ret)
return ret;
for_each_mod_mem_type(type) {
struct module_memory *mem = &mod->mem[type];
if (mem->is_rox) {
if (!execmem_update_copy(mem->base, mem->rw_copy,
mem->size))
return -ENOMEM;
vfree(mem->rw_copy);
mem->rw_copy = NULL;
}
}
return module_post_finalize(info->hdr, info->sechdrs, mod);
return module_finalize(info->hdr, info->sechdrs, mod);
}
/* Call module constructors. */
@ -3499,6 +3463,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
mod->mem[type].size);
}
module_memory_restore_rox(mod);
module_deallocate(mod, info);
free_copy:
/*

View File

@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/execmem.h>
#include "internal.h"
static int module_set_memory(const struct module *mod, enum mod_mem_type type,
@ -32,12 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type,
int module_enable_text_rox(const struct module *mod)
{
for_class_mod_mem_type(type, text) {
const struct module_memory *mem = &mod->mem[type];
int ret;
if (mod->mem[type].is_rox)
continue;
if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
if (mem->is_rox)
ret = execmem_restore_rox(mem->base, mem->size);
else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
ret = module_set_memory(mod, type, set_memory_rox);
else
ret = module_set_memory(mod, type, set_memory_x);

View File

@ -257,7 +257,6 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size)
static int execmem_cache_populate(struct execmem_range *range, size_t size)
{
unsigned long vm_flags = VM_ALLOW_HUGE_VMAP;
unsigned long start, end;
struct vm_struct *vm;
size_t alloc_size;
int err = -ENOMEM;
@ -275,26 +274,18 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size)
/* fill memory with instructions that will trap */
execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true);
start = (unsigned long)p;
end = start + alloc_size;
vunmap_range(start, end);
err = execmem_set_direct_map_valid(vm, false);
if (err)
goto err_free_mem;
err = vmap_pages_range_noflush(start, end, range->pgprot, vm->pages,
PMD_SHIFT);
err = set_memory_rox((unsigned long)p, vm->nr_pages);
if (err)
goto err_free_mem;
err = execmem_cache_add(p, alloc_size);
if (err)
goto err_free_mem;
goto err_reset_direct_map;
return 0;
err_reset_direct_map:
execmem_set_direct_map_valid(vm, true);
err_free_mem:
vfree(p);
return err;
@ -344,6 +335,28 @@ static bool execmem_cache_free(void *ptr)
return true;
}
int execmem_make_temp_rw(void *ptr, size_t size)
{
unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long addr = (unsigned long)ptr;
int ret;
ret = set_memory_nx(addr, nr);
if (ret)
return ret;
return set_memory_rw(addr, nr);
}
int execmem_restore_rox(void *ptr, size_t size)
{
unsigned int nr = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long addr = (unsigned long)ptr;
return set_memory_rox(addr, nr);
}
#else /* CONFIG_ARCH_HAS_EXECMEM_ROX */
static void *execmem_cache_alloc(struct execmem_range *range, size_t size)
{

View File

@ -1435,6 +1435,8 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_X86
"direct_map_level2_splits",
"direct_map_level3_splits",
"direct_map_level2_collapses",
"direct_map_level3_collapses",
#endif
#ifdef CONFIG_PER_VMA_LOCK_STATS
"vma_lock_success",