diff --git a/Documentation/dontdiff b/Documentation/dontdiff index e1efc40..47f0daf 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -1,15 +1,19 @@ *.a *.aux *.bin +*.cis *.cpio *.csp +*.dbg *.dsp *.dvi *.elf *.eps *.fw +*.gcno *.gen.S *.gif +*.gmo *.grep *.grp *.gz @@ -38,8 +42,10 @@ *.tab.h *.tex *.ver +*.vim *.xml *_MODULES +*_reg_safe.h *_vga16.c *~ *.9 @@ -49,11 +55,16 @@ 53c700_d.h CVS ChangeSet +GPATH +GRTAGS +GSYMS +GTAGS Image Kerntypes Module.markers Module.symvers PENDING +PERF* SCCS System.map* TAGS @@ -76,7 +87,11 @@ btfixupprep build bvmlinux bzImage* +capability_names.h +capflags.c classlist.h* +clut_vga16.c +common-cmds.h comp*.log compile.h* conf @@ -84,6 +99,8 @@ config config-* config_data.h* config_data.gz* +config.c +config.tmp conmakehash consolemap_deftbl.c* cpustr.h @@ -97,19 +114,23 @@ elfconfig.h* fixdep fore200e_mkfirm fore200e_pca_fw.c* +gate.lds gconf gen-devlist gen_crc32table gen_init_cpio genksyms *_gray256.c +hash +hid-example ihex2fw ikconfig.h* initramfs_data.cpio +initramfs_data.cpio.bz2 initramfs_data.cpio.gz initramfs_list kallsyms -kconfig +kern_constants.h keywords.c ksym.c* ksym.h* @@ -127,13 +148,16 @@ machtypes.h map maui_boot.h mconf +mdp miboot* mk_elfconfig mkboot mkbugboot mkcpustr mkdep +mkpiggy mkprep +mkregtable mktables mktree modpost @@ -149,6 +173,7 @@ patches* pca200e.bin pca200e_ecd.bin2 piggy.gz +piggy.S piggyback pnmtologo ppc_defs.h* @@ -157,12 +182,15 @@ qconf raid6altivec*.c raid6int*.c raid6tables.c +regdb.c relocs +rlim_names.h series setup setup.bin setup.elf sImage +slabinfo sm_tbl* split-include syscalltab.h @@ -171,6 +199,7 @@ tftpboot.img timeconst.h times.h* trix_boot.h +user_constants.h utsrelease.h* vdso-syms.lds vdso.lds @@ -186,14 +215,20 @@ version.h* vmlinux vmlinux-* vmlinux.aout +vmlinux.bin.all +vmlinux.bin.bz2 vmlinux.lds +vmlinux.relocs +voffset.h vsyscall.lds vsyscall_32.lds wanxlfw.inc uImage unifdef +utsrelease.h wakeup.bin wakeup.elf wakeup.lds zImage* zconf.hash.c +zoffset.h diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c840e7d..f4c451c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1837,6 +1837,13 @@ and is between 256 and 4096 characters. It is defined in the file the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. + pax_nouderef [X86] disables UDEREF. Most likely needed under certain + virtualization environments that don't cope well with the + expand down segment used by UDEREF on X86-32 or the frequent + page table updates on X86-64. + + pax_softmode= 0/1 to disable/enable PaX softmode on boot already. + pcbit= [HW,ISDN] pcd. [PARIDE] diff --git a/Makefile b/Makefile index 64d4fc6..3b32f7f 100644 --- a/Makefile +++ b/Makefile @@ -221,8 +221,9 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -HOSTCXXFLAGS = -O2 +HOSTCFLAGS = -Wall -W -Wmissing-prototypes -Wstrict-prototypes -Wno-unused-parameter -Wno-missing-field-initializers -O2 -fomit-frame-pointer -fno-delete-null-pointer-checks +HOSTCFLAGS += $(call cc-option, -Wno-empty-body) +HOSTCXXFLAGS = -O2 -Wall -W -fno-delete-null-pointer-checks # Decide whether to build built-in, modular, or both. # Normally, just do built-in. @@ -376,8 +377,8 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exc # Rules shared between *config targets and build targets # Basic helpers built in scripts/ -PHONY += scripts_basic -scripts_basic: +PHONY += scripts_basic gcc-plugins +scripts_basic: gcc-plugins $(Q)$(MAKE) $(build)=scripts/basic # To avoid any implicit rule to kick in, define an empty command. @@ -403,7 +404,7 @@ endif # of make so .config is not included in this case either (for *config). no-dot-config-targets := clean mrproper distclean \ - cscope TAGS tags help %docs check% \ + cscope gtags TAGS tags help %docs check% \ include/linux/version.h headers_% \ kernelrelease kernelversion @@ -526,6 +527,46 @@ else KBUILD_CFLAGS += -O2 endif +ifndef DISABLE_PAX_PLUGINS +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-plugin.sh "$(HOSTCC)" "$(CC)"), y) +ifndef DISABLE_PAX_CONSTIFY_PLUGIN +CONSTIFY_PLUGIN := -fplugin=$(objtree)/tools/gcc/constify_plugin.so -DCONSTIFY_PLUGIN +endif +ifdef CONFIG_PAX_MEMORY_STACKLEAK +STACKLEAK_PLUGIN := -fplugin=$(objtree)/tools/gcc/stackleak_plugin.so -DSTACKLEAK_PLUGIN +STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-track-lowest-sp=100 +endif +ifdef CONFIG_KALLOCSTAT_PLUGIN +KALLOCSTAT_PLUGIN := -fplugin=$(objtree)/tools/gcc/kallocstat_plugin.so +endif +ifdef CONFIG_PAX_KERNEXEC_PLUGIN +KERNEXEC_PLUGIN := -fplugin=$(objtree)/tools/gcc/kernexec_plugin.so +KERNEXEC_PLUGIN += -fplugin-arg-kernexec_plugin-method=$(CONFIG_PAX_KERNEXEC_PLUGIN_METHOD) +endif +ifdef CONFIG_CHECKER_PLUGIN +ifeq ($(call cc-ifversion, -ge, 0406, y), y) +CHECKER_PLUGIN := -fplugin=$(objtree)/tools/gcc/checker_plugin.so -DCHECKER_PLUGIN +endif +endif +GCC_PLUGINS := $(CONSTIFY_PLUGIN) $(STACKLEAK_PLUGIN) $(KALLOCSTAT_PLUGIN) $(KERNEXEC_PLUGIN) $(CHECKER_PLUGIN) +export CONSTIFY_PLUGIN STACKLEAK_PLUGIN KERNEXEC_PLUGIN CHECKER_PLUGIN +ifeq ($(KBUILD_EXTMOD),) +gcc-plugins: + $(Q)$(MAKE) $(build)=tools/gcc +else +gcc-plugins: ; +endif +else +gcc-plugins: +ifeq ($(call cc-ifversion, -ge, 0405, y), y) + $(error Your gcc installation does not support plugins. If the necessary headers for plugin support are missing, they should be installed. On Debian, apt-get install gcc--plugin-dev. If you choose to ignore this error and lessen the improvements provided by this patch, re-run make with the DISABLE_PAX_PLUGINS=y argument.)) +else + $(Q)echo "warning, your gcc version does not support plugins, you should upgrade it to gcc 4.5 at least" +endif + $(Q)echo "PAX_MEMORY_STACKLEAK and constification will be less secure" +endif +endif + include $(srctree)/arch/$(SRCARCH)/Makefile ifneq ($(CONFIG_FRAME_WARN),0) @@ -647,7 +688,7 @@ export mod_strip_cmd ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ @@ -868,6 +909,7 @@ vmlinux.o: $(modpost-init) $(vmlinux-main) FORCE # The actual objects are generated when descending, # make sure no implicit rule kicks in +$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): KBUILD_CFLAGS += $(GCC_PLUGINS) $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; # Handle descending into subdirectories listed in $(vmlinux-dirs) @@ -877,7 +919,7 @@ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; # Error messages still appears in the original language PHONY += $(vmlinux-dirs) -$(vmlinux-dirs): prepare scripts +$(vmlinux-dirs): gcc-plugins prepare scripts $(Q)$(MAKE) $(build)=$@ # Build the kernel release string @@ -986,6 +1028,7 @@ prepare0: archprepare FORCE $(Q)$(MAKE) $(build)=. missing-syscalls # All the preparing.. +prepare: KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS),$(KBUILD_CFLAGS)) prepare: prepare0 # The asm symlink changes when $(ARCH) changes. @@ -1127,6 +1170,7 @@ all: modules # using awk while concatenating to the final file. PHONY += modules +modules: KBUILD_CFLAGS += $(GCC_PLUGINS) modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) $(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order @$(kecho) ' Building modules, stage 2.'; @@ -1136,7 +1180,7 @@ modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) # Target to prepare building external modules PHONY += modules_prepare -modules_prepare: prepare scripts +modules_prepare: gcc-plugins prepare scripts # Target to install modules PHONY += modules_install @@ -1201,7 +1245,7 @@ MRPROPER_FILES += .config .config.old include/asm .version .old_version \ include/linux/autoconf.h include/linux/version.h \ include/linux/utsrelease.h \ include/linux/bounds.h include/asm*/asm-offsets.h \ - Module.symvers Module.markers tags TAGS cscope* + Module.symvers Module.markers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS # clean - Delete most, but leave enough to build external modules # @@ -1245,7 +1289,7 @@ distclean: mrproper @find $(srctree) $(RCS_FIND_IGNORE) \ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ - -o -name '.*.rej' -o -size 0 \ + -o -name '.*.rej' -o -name '*.so' -o -size 0 \ -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \ -type f -print | xargs rm -f @@ -1292,6 +1336,7 @@ help: @echo ' modules_prepare - Set up for building external modules' @echo ' tags/TAGS - Generate tags file for editors' @echo ' cscope - Generate cscope index' + @echo ' gtags - Generate GNU GLOBAL index' @echo ' kernelrelease - Output the release version string' @echo ' kernelversion - Output the version stored in Makefile' @echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ @@ -1393,6 +1438,7 @@ PHONY += $(module-dirs) modules $(module-dirs): crmodverdir $(objtree)/Module.symvers $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) +modules: KBUILD_CFLAGS += $(GCC_PLUGINS) modules: $(module-dirs) @$(kecho) ' Building modules, stage 2.'; $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost @@ -1448,7 +1494,7 @@ endif # KBUILD_EXTMOD quiet_cmd_tags = GEN $@ cmd_tags = $(CONFIG_SHELL) $(srctree)/scripts/tags.sh $@ -tags TAGS cscope: FORCE +tags TAGS cscope gtags: FORCE $(call cmd,tags) # Scripts to check various things for consistency @@ -1513,17 +1559,19 @@ else target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@)) endif -%.s: %.c prepare scripts FORCE +%.s: KBUILD_CFLAGS += $(GCC_PLUGINS) +%.s: %.c gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.i: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.o: %.c prepare scripts FORCE +%.o: KBUILD_CFLAGS += $(GCC_PLUGINS) +%.o: %.c gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.lst: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.s: %.S prepare scripts FORCE +%.s: %.S gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.o: %.S prepare scripts FORCE +%.o: %.S gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.symtypes: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) @@ -1533,11 +1581,13 @@ endif $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) -%/: prepare scripts FORCE +%/: KBUILD_CFLAGS += $(GCC_PLUGINS) +%/: gcc-plugins prepare scripts FORCE $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) -%.ko: prepare scripts FORCE +%.ko: KBUILD_CFLAGS += $(GCC_PLUGINS) +%.ko: gcc-plugins prepare scripts FORCE $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) $(@:.ko=.o) diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 5c75c1b..c82f878 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -91,6 +91,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (current->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL) + +#define PAX_DELTA_MMAP_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 28) +#define PAX_DELTA_STACK_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 19) +#endif + /* $0 is set by ld.so to a pointer to a function which might be registered using atexit. This provides a mean for the dynamic linker to call DT_FINI functions for shared libraries that have diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 3f0c59f..cf1e100 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -101,6 +101,17 @@ struct vm_area_struct; #define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS) #define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) #define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE) #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index ebc3c89..20cfa63 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -182,7 +182,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index a94e49c..d71dd44 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1172,7 +1172,7 @@ arch_get_unmapped_area_1(unsigned long addr, unsigned long len, /* At this point: (!vma || addr < vma->vm_end). */ if (limit - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, addr, len)) return addr; addr = vma->vm_end; vma = vma->vm_next; @@ -1208,6 +1208,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != (unsigned long) -ENOMEM) @@ -1215,8 +1219,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); + if (addr != (unsigned long) -ENOMEM) return addr; diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 00a31de..2ded0f2 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -54,6 +54,124 @@ __load_new_mm_context(struct mm_struct *next_mm) __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long *)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -131,8 +249,29 @@ do_page_fault(unsigned long address, unsigned long mmcsr, good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void *)regs->pc, (void *)rdusp()); + do_group_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 6aac3f5..265536b 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -109,7 +109,14 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x00008000UL + +#define PAX_DELTA_MMAP_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) +#define PAX_DELTA_STACK_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) +#endif /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index c019949..388fdd1 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -19,6 +19,7 @@ enum km_type { KM_SOFTIRQ0, KM_SOFTIRQ1, KM_L2_CACHE, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 1d6bd40..fba0cb9 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -22,6 +22,8 @@ #define VERIFY_READ 0 #define VERIFY_WRITE 1 +extern void check_object_size(const void *ptr, unsigned long n, bool to); + /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is @@ -387,8 +389,23 @@ do { \ #ifdef CONFIG_MMU -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long n); + +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + return ___copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return ___copy_to_user(to, from, n); +} + extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n); @@ -403,6 +420,9 @@ extern unsigned long __must_check __strnlen_user(const char __user *s, long n); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); else /* security hole - plug it */ @@ -412,6 +432,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 0e62770..e2c2cd6 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -118,8 +118,8 @@ EXPORT_SYMBOL(__strncpy_from_user); #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(___copy_from_user); +EXPORT_SYMBOL(___copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__get_user_1); diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index ba8ccfe..2dc34dc 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -190,7 +190,7 @@ void kgdb_arch_exit(void) * and we handle the normal undef case within the do_undefinstr * handler. */ -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { #ifndef __ARMEB__ .gdb_bpt_instr = {0xfe, 0xde, 0xff, 0xe7} #else /* ! __ARMEB__ */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3f361a7..6e806e1 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -247,6 +247,8 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p DEFINE_SPINLOCK(die_lock); +extern void gr_handle_kernel_exploit(void); + /* * This function is protected against re-entrancy. */ @@ -271,6 +273,8 @@ NORET_TYPE void die(const char *str, struct pt_regs *regs, int err) if (panic_on_oops) panic("Fatal exception"); + gr_handle_kernel_exploit(); + do_exit(SIGSEGV); } diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index e4fe124..0fc246b 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -16,7 +16,7 @@ /* * Prototype: * - * size_t __copy_from_user(void *to, const void *from, size_t n) + * size_t ___copy_from_user(void *to, const void *from, size_t n) * * Purpose: * @@ -84,11 +84,11 @@ .text -ENTRY(__copy_from_user) +ENTRY(___copy_from_user) #include "copy_template.S" -ENDPROC(__copy_from_user) +ENDPROC(___copy_from_user) .section .fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 1a71e15..ac7b258 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -16,7 +16,7 @@ /* * Prototype: * - * size_t __copy_to_user(void *to, const void *from, size_t n) + * size_t ___copy_to_user(void *to, const void *from, size_t n) * * Purpose: * @@ -88,11 +88,11 @@ .text ENTRY(__copy_to_user_std) -WEAK(__copy_to_user) +WEAK(___copy_to_user) #include "copy_template.S" -ENDPROC(__copy_to_user) +ENDPROC(___copy_to_user) .section .fixup,"ax" .align 0 diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index ffdd274..91017b6 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -19,7 +19,7 @@ #define PAGE_SHIFT 12 -/* Prototype: int __copy_to_user(void *to, const char *from, size_t n) +/* Prototype: int ___copy_to_user(void *to, const char *from, size_t n) * Purpose : copy a block to user memory from kernel memory * Params : to - user memory * : from - kernel memory @@ -39,7 +39,7 @@ USER( strgtbt r3, [r0], #1) @ May fault sub r2, r2, ip b .Lc2u_dest_aligned -ENTRY(__copy_to_user) +ENTRY(___copy_to_user) stmfd sp!, {r2, r4 - r7, lr} cmp r2, #4 blt .Lc2u_not_enough @@ -277,14 +277,14 @@ USER( strgebt r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 USER( strgtbt r3, [r0], #1) @ May fault b .Lc2u_finished -ENDPROC(__copy_to_user) +ENDPROC(___copy_to_user) .section .fixup,"ax" .align 0 9001: ldmfd sp!, {r0, r4 - r7, pc} .previous -/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n); +/* Prototype: unsigned long ___copy_from_user(void *to,const void *from,unsigned long n); * Purpose : copy a block from user memory to kernel memory * Params : to - kernel memory * : from - user memory @@ -303,7 +303,7 @@ USER( ldrgtbt r3, [r1], #1) @ May fault sub r2, r2, ip b .Lcfu_dest_aligned -ENTRY(__copy_from_user) +ENTRY(___copy_from_user) stmfd sp!, {r0, r2, r4 - r7, lr} cmp r2, #4 blt .Lcfu_not_enough @@ -543,7 +543,7 @@ USER( ldrgebt r3, [r1], #1) @ May fault USER( ldrgtbt r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished -ENDPROC(__copy_from_user) +ENDPROC(___copy_from_user) .section .fixup,"ax" .align 0 diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 6b967ff..67d5b2b 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -97,7 +97,7 @@ out: } unsigned long -__copy_to_user(void __user *to, const void *from, unsigned long n) +___copy_to_user(void __user *to, const void *from, unsigned long n) { /* * This test is stubbed out of the main function above to keep diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 4028724..beec230 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -348,7 +348,7 @@ static void at91_pm_end(void) } -static struct platform_suspend_ops at91_pm_ops ={ +static const struct platform_suspend_ops at91_pm_ops ={ .valid = at91_pm_valid_state, .begin = at91_pm_begin, .enter = at91_pm_enter, diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 5218943..0a34552 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -647,7 +647,7 @@ static struct irqaction omap_wakeup_irq = { -static struct platform_suspend_ops omap_pm_ops ={ +static const struct platform_suspend_ops omap_pm_ops ={ .prepare = omap_pm_prepare, .enter = omap_pm_enter, .finish = omap_pm_finish, diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index bff5c4e..d4c649b 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -326,7 +326,7 @@ static void omap2_pm_finish(void) enable_hlt(); } -static struct platform_suspend_ops omap_pm_ops = { +static const struct platform_suspend_ops omap_pm_ops = { .prepare = omap2_pm_prepare, .enter = omap2_pm_enter, .finish = omap2_pm_finish, diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 8946319..7d3e661 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -401,7 +401,7 @@ static void omap3_pm_end(void) return; } -static struct platform_suspend_ops omap_pm_ops = { +static const struct platform_suspend_ops omap_pm_ops = { .begin = omap3_pm_begin, .end = omap3_pm_end, .prepare = omap3_pm_prepare, diff --git a/arch/arm/mach-pnx4008/pm.c b/arch/arm/mach-pnx4008/pm.c index b3d8d53..6e68ebc 100644 --- a/arch/arm/mach-pnx4008/pm.c +++ b/arch/arm/mach-pnx4008/pm.c @@ -116,7 +116,7 @@ static int pnx4008_pm_valid(suspend_state_t state) (state == PM_SUSPEND_MEM); } -static struct platform_suspend_ops pnx4008_pm_ops = { +static const struct platform_suspend_ops pnx4008_pm_ops = { .enter = pnx4008_pm_enter, .valid = pnx4008_pm_valid, }; diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c index 7693355..9beb00a 100644 --- a/arch/arm/mach-pxa/pm.c +++ b/arch/arm/mach-pxa/pm.c @@ -95,7 +95,7 @@ void pxa_pm_finish(void) pxa_cpu_pm_fns->finish(); } -static struct platform_suspend_ops pxa_pm_ops = { +static const struct platform_suspend_ops pxa_pm_ops = { .valid = pxa_pm_valid, .enter = pxa_pm_enter, .prepare = pxa_pm_prepare, diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c index 629e05d..06be589 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.c +++ b/arch/arm/mach-pxa/sharpsl_pm.c @@ -891,7 +891,7 @@ static void sharpsl_apm_get_power_status(struct apm_power_info *info) } #ifdef CONFIG_PM -static struct platform_suspend_ops sharpsl_pm_ops = { +static const struct platform_suspend_ops sharpsl_pm_ops = { .prepare = pxa_pm_prepare, .finish = pxa_pm_finish, .enter = corgi_pxa_pm_enter, diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index c83fdc8..ab9fc44 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -120,7 +120,7 @@ unsigned long sleep_phys_sp(void *sp) return virt_to_phys(sp); } -static struct platform_suspend_ops sa11x0_pm_ops = { +static const struct platform_suspend_ops sa11x0_pm_ops = { .enter = sa11x0_pm_enter, .valid = suspend_valid_only_mem, }; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 3191cd6..c0739db 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -166,6 +166,13 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, } #endif +#ifdef CONFIG_PAX_PAGEEXEC + if (fsr & FSR_LNX_PF) { + pax_report_fault(regs, (void *)regs->ARM_pc, (void *)regs->ARM_sp); + do_group_exit(SIGKILL); + } +#endif + tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; @@ -357,6 +364,33 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } #endif /* CONFIG_MMU */ +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (__force unsigned char __user *)pc+i)) + printk(KERN_CONT "?? "); + else + printk(KERN_CONT "%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-4: "); + for (i = -1; i < 20; i++) { + unsigned long c; + if (get_user(c, (__force unsigned long __user *)sp+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08lx ", c); + } + printk("\n"); +} +#endif + /* * First Level Translation Fault Handler * diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index f5abc51..7ec524c 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -63,6 +63,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -70,15 +74,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; } full_search: @@ -94,14 +97,14 @@ full_search: * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr, len)) { /* * Remember the place where we stopped the search: */ diff --git a/arch/arm/plat-s3c/pm.c b/arch/arm/plat-s3c/pm.c index 8d97db2..b66cfa5 100644 --- a/arch/arm/plat-s3c/pm.c +++ b/arch/arm/plat-s3c/pm.c @@ -355,7 +355,7 @@ static void s3c_pm_finish(void) s3c_pm_check_cleanup(); } -static struct platform_suspend_ops s3c_pm_ops = { +static const struct platform_suspend_ops s3c_pm_ops = { .enter = s3c_pm_enter, .prepare = s3c_pm_prepare, .finish = s3c_pm_finish, diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h index d5d1d41..856e2ed 100644 --- a/arch/avr32/include/asm/elf.h +++ b/arch/avr32/include/asm/elf.h @@ -85,8 +85,14 @@ typedef struct user_fpu_struct elf_fpregset_t; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x00001000UL + +#define PAX_DELTA_MMAP_LEN 15 +#define PAX_DELTA_STACK_LEN 15 +#endif /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/avr32/include/asm/kmap_types.h b/arch/avr32/include/asm/kmap_types.h index b7f5c68..556135c 100644 --- a/arch/avr32/include/asm/kmap_types.h +++ b/arch/avr32/include/asm/kmap_types.h @@ -22,7 +22,8 @@ D(10) KM_IRQ0, D(11) KM_IRQ1, D(12) KM_SOFTIRQ0, D(13) KM_SOFTIRQ1, -D(14) KM_TYPE_NR +D(14) KM_CLEARPAGE, +D(15) KM_TYPE_NR }; #undef D diff --git a/arch/avr32/mach-at32ap/pm.c b/arch/avr32/mach-at32ap/pm.c index f021edf..32d680e 100644 --- a/arch/avr32/mach-at32ap/pm.c +++ b/arch/avr32/mach-at32ap/pm.c @@ -176,7 +176,7 @@ out: return 0; } -static struct platform_suspend_ops avr32_pm_ops = { +static const struct platform_suspend_ops avr32_pm_ops = { .valid = avr32_pm_valid_state, .enter = avr32_pm_enter, }; diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index b61d86d..e292c7f 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -41,6 +41,23 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap) int exception_trace = 1; +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%02x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -157,6 +174,16 @@ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) { + pax_report_fault(regs, (void *)regs->pc, (void *)regs->sp); + do_group_exit(SIGKILL); + } + } +#endif + if (exception_trace && printk_ratelimit()) printk("%s%s[%d]: segfault at %08lx pc %08lx " "sp %08lx ecr %lu\n", diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c index cce79d0..c406c85 100644 --- a/arch/blackfin/kernel/kgdb.c +++ b/arch/blackfin/kernel/kgdb.c @@ -428,7 +428,7 @@ int kgdb_arch_handle_exception(int vector, int signo, return -1; /* this means that we do not want to exit from the handler */ } -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { .gdb_bpt_instr = {0xa1}, #ifdef CONFIG_SMP .flags = KGDB_HW_BREAKPOINT|KGDB_THR_PROC_SWAP, diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c index 8837be4..b2fb413 100644 --- a/arch/blackfin/mach-common/pm.c +++ b/arch/blackfin/mach-common/pm.c @@ -255,7 +255,7 @@ static int bfin_pm_enter(suspend_state_t state) return 0; } -struct platform_suspend_ops bfin_pm_ops = { +const struct platform_suspend_ops bfin_pm_ops = { .enter = bfin_pm_enter, .valid = bfin_pm_valid, }; diff --git a/arch/frv/include/asm/kmap_types.h b/arch/frv/include/asm/kmap_types.h index f8e16b2..c73ff79 100644 --- a/arch/frv/include/asm/kmap_types.h +++ b/arch/frv/include/asm/kmap_types.h @@ -23,6 +23,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c index 385fd30..6c3d97e 100644 --- a/arch/frv/mm/elf-fdpic.c +++ b/arch/frv/mm/elf-fdpic.c @@ -73,8 +73,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len)) goto success; } @@ -89,7 +88,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi for (; vma; vma = vma->vm_next) { if (addr > limit) break; - if (addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, addr, len)) goto success; addr = vma->vm_end; } @@ -104,7 +103,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi for (; vma; vma = vma->vm_next) { if (addr > limit) break; - if (addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, addr, len)) goto success; addr = vma->vm_end; } diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index e4a80d8..11a7ea1 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -17,7 +17,7 @@ #include #include -extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; +extern const struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; /* swiotlb declarations & definitions: */ extern int swiotlb_late_init_with_default_size (size_t size); @@ -33,7 +33,7 @@ static inline int use_swiotlb(struct device *dev) !sba_dma_ops.dma_supported(dev, *dev->dma_mask); } -struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) +const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) { if (use_swiotlb(dev)) return &swiotlb_dma_ops; diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 01ae69b..35752fd 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2097,7 +2097,7 @@ static struct acpi_driver acpi_sba_ioc_driver = { }, }; -extern struct dma_map_ops swiotlb_dma_ops; +extern const struct dma_map_ops swiotlb_dma_ops; static int __init sba_init(void) @@ -2211,7 +2211,7 @@ sba_page_override(char *str) __setup("sbapagesize=",sba_page_override); -struct dma_map_ops sba_dma_ops = { +const struct dma_map_ops sba_dma_ops = { .alloc_coherent = sba_alloc_coherent, .free_coherent = sba_free_coherent, .map_page = sba_map_page, diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index c69552b..c7122f4 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -45,6 +45,13 @@ randomize_stack_top(unsigned long stack_top); #define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack)) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#endif + /* Ugly but avoids duplication */ #include "../../../fs/binfmt_elf.c" diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index 0f15349..26b3429 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -296,7 +296,14 @@ typedef struct compat_siginfo { #define ELF_DATA ELFDATA2LSB #define ELF_ARCH EM_386 -#define IA32_STACK_TOP IA32_PAGE_OFFSET +#ifdef CONFIG_PAX_RANDUSTACK +#define __IA32_DELTA_STACK (current->mm->delta_stack) +#else +#define __IA32_DELTA_STACK 0UL +#endif + +#define IA32_STACK_TOP (IA32_PAGE_OFFSET - __IA32_DELTA_STACK) + #define IA32_GATE_OFFSET IA32_PAGE_OFFSET #define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 8d3c79c..71b3af6 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -12,7 +12,7 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); @@ -24,7 +24,7 @@ extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *daddr, gfp_t gfp) { - struct dma_map_ops *ops = platform_dma_get_ops(dev); + const struct dma_map_ops *ops = platform_dma_get_ops(dev); void *caddr; caddr = ops->alloc_coherent(dev, size, daddr, gfp); @@ -35,7 +35,7 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size, static inline void dma_free_coherent(struct device *dev, size_t size, void *caddr, dma_addr_t daddr) { - struct dma_map_ops *ops = platform_dma_get_ops(dev); + const struct dma_map_ops *ops = platform_dma_get_ops(dev); debug_dma_free_coherent(dev, size, caddr, daddr); ops->free_coherent(dev, size, caddr, daddr); } @@ -49,13 +49,13 @@ static inline void dma_free_coherent(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr) { - struct dma_map_ops *ops = platform_dma_get_ops(dev); + const struct dma_map_ops *ops = platform_dma_get_ops(dev); return ops->mapping_error(dev, daddr); } static inline int dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *ops = platform_dma_get_ops(dev); + const struct dma_map_ops *ops = platform_dma_get_ops(dev); return ops->dma_supported(dev, mask); } diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index 86eddee..b116bb4 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -43,6 +43,13 @@ */ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#endif + #define PT_IA_64_UNWIND 0x70000001 /* IA-64 relocations: */ diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 367d299..9ad4279 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -45,7 +45,7 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); typedef u64 ia64_mv_dma_get_required_mask (struct device *); -typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); +typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* * WARNING: The legacy I/O space is _architected_. Platforms are @@ -251,7 +251,7 @@ extern void machvec_init_from_cmdline(const char *cmdline); # endif /* CONFIG_IA64_GENERIC */ extern void swiotlb_dma_init(void); -extern struct dma_map_ops *dma_get_ops(struct device *); +extern const struct dma_map_ops *dma_get_ops(struct device *); /* * Define default versions so we can extend machvec for new platforms without having diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 8840a69..cdb63d9 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -12,7 +12,7 @@ * David Mosberger-Tang */ - +#include #include #include #include @@ -143,6 +143,17 @@ #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) #define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) +# define PAGE_READONLY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +# define PAGE_COPY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_READONLY_NOEXEC PAGE_READONLY +# define PAGE_COPY_NOEXEC PAGE_COPY +#endif + #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 239ecdc..f94170e 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -72,7 +72,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); - ACCESS_ONCE(*p) = (tmp + 2) & ~1; + ACCESS_ONCE_RW(*p) = (tmp + 2) & ~1; } static __always_inline void __ticket_spin_unlock_wait(raw_spinlock_t *lock) diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 449c8c0..432a3d2 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -257,7 +257,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) const void *__cu_from = (from); \ long __cu_len = (n); \ \ - if (__access_ok(__cu_to, __cu_len, get_fs())) \ + if (__cu_len > 0 && __cu_len <= INT_MAX && __access_ok(__cu_to, __cu_len, get_fs())) \ __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ __cu_len; \ }) @@ -269,7 +269,7 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) long __cu_len = (n); \ \ __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) \ + if (__cu_len > 0 && __cu_len <= INT_MAX && __access_ok(__cu_from, __cu_len, get_fs())) \ __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ __cu_len; \ }) diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index f2c1600..969398a 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -3,7 +3,7 @@ /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly; -struct dma_map_ops *dma_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) @@ -16,7 +16,7 @@ static int __init dma_init(void) } fs_initcall(dma_init); -struct dma_map_ops *dma_get_ops(struct device *dev) +const struct dma_map_ops *dma_get_ops(struct device *dev) { return dma_ops; } diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 1481b0a..e7d38ff 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -315,8 +315,7 @@ module_alloc (unsigned long size) void module_free (struct module *mod, void *module_region) { - if (mod && mod->arch.init_unw_table && - module_region == mod->module_init) { + if (mod && mod->arch.init_unw_table && module_region == mod->module_init_rx) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } @@ -502,15 +501,39 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, } static inline int +in_init_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx; +} + +static inline int +in_init_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw; +} + +static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return in_init_rx(mod, addr) || in_init_rw(mod, addr); +} + +static inline int +in_core_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx; +} + +static inline int +in_core_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return in_core_rx(mod, addr) || in_core_rw(mod, addr); } static inline int @@ -693,7 +716,14 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + if (in_init_rx(mod, val)) + val -= (uint64_t) mod->module_init_rx; + else if (in_init_rw(mod, val)) + val -= (uint64_t) mod->module_init_rw; + else if (in_core_rx(mod, val)) + val -= (uint64_t) mod->module_core_rx; + else if (in_core_rw(mod, val)) + val -= (uint64_t) mod->module_core_rw; break; case RV_LTV: @@ -828,15 +858,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = (mod->core_size_rx + mod->core_size_rw) / 2; + gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); } diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index f6b1ff0..de773fb 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -43,7 +43,7 @@ struct device fallback_dev = { .dma_mask = &fallback_dev.coherent_dma_mask, }; -extern struct dma_map_ops intel_dma_ops; +extern const struct dma_map_ops intel_dma_ops; static int __init pci_iommu_init(void) { @@ -96,15 +96,34 @@ int iommu_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(iommu_dma_supported); +extern void *intel_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); +extern void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +extern int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs); +extern void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs); +extern dma_addr_t intel_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); +extern void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs); +extern int intel_mapping_error(struct device *dev, dma_addr_t dma_addr); + +static const struct dma_map_ops intel_iommu_dma_ops = { + /* from drivers/pci/intel-iommu.c:intel_dma_ops */ + .alloc_coherent = intel_alloc_coherent, + .free_coherent = intel_free_coherent, + .map_sg = intel_map_sg, + .unmap_sg = intel_unmap_sg, + .map_page = intel_map_page, + .unmap_page = intel_unmap_page, + .mapping_error = intel_mapping_error, + + .sync_single_for_cpu = machvec_dma_sync_single, + .sync_sg_for_cpu = machvec_dma_sync_sg, + .sync_single_for_device = machvec_dma_sync_single, + .sync_sg_for_device = machvec_dma_sync_sg, + .dma_supported = iommu_dma_supported, +}; + void __init pci_iommu_alloc(void) { - dma_ops = &intel_dma_ops; - - dma_ops->sync_single_for_cpu = machvec_dma_sync_single; - dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg; - dma_ops->sync_single_for_device = machvec_dma_sync_single; - dma_ops->sync_sg_for_device = machvec_dma_sync_sg; - dma_ops->dma_supported = iommu_dma_supported; + dma_ops = &intel_iommu_dma_ops; /* * The order of these functions is important for diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8..61dbab6 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -21,7 +21,7 @@ static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); } -struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = ia64_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, .map_page = swiotlb_map_page, diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 609d500..7dde2a8 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -43,6 +43,13 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; #endif + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + addr = mm->free_area_cache; + else +#endif + if (!addr) addr = mm->free_area_cache; @@ -61,14 +68,14 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { + if (start_addr != mm->mmap_base) { /* Start a new search --- just in case we missed some holes. */ - addr = TASK_UNMAPPED_BASE; + addr = mm->mmap_base; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr, len)) { /* Remember the address where we stopped this search: */ mm->free_area_cache = addr + len; return addr; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 8f06035..b3a5818 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -282,7 +282,7 @@ static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * return ret; } -static struct sysfs_ops cache_sysfs_ops = { +static const struct sysfs_ops cache_sysfs_ops = { .show = cache_show }; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 0a0c77b..8e55a81 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -190,7 +190,7 @@ SECTIONS /* Per-cpu data: */ . = ALIGN(PERCPU_PAGE_SIZE); PERCPU_VADDR(PERCPU_ADDR, :percpu) - __phys_per_cpu_start = __per_cpu_load; + __phys_per_cpu_start = per_cpu_load; . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits * into percpu page size */ diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 19261a9..1611b7a 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -72,6 +72,23 @@ mapped_kernel_page_is_present (unsigned long address) return pte_present(pte); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + void __kprobes ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) { @@ -145,9 +162,23 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - if ((vma->vm_flags & mask) != mask) + if ((vma->vm_flags & mask) != mask) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) + goto bad_area; + + up_read(&mm->mmap_sem); + pax_report_fault(regs, (void *)regs->cr_iip, (void *)regs->r12); + do_group_exit(SIGKILL); + } +#endif + goto bad_area; + } + survive: /* * If for any reason at all we couldn't handle the fault, make diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index b0f6157..a082bbc 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -172,7 +172,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u /* At this point: (!vmm || addr < vmm->vm_end). */ if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT) return -ENOMEM; - if (!vmm || (addr + len) <= vmm->vm_start) + if (check_heap_stack_gap(vmm, addr, len)) return addr; addr = ALIGN(vmm->vm_end, HPAGE_SIZE); } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 1857766..05cc6a3 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -122,6 +122,19 @@ ia64_init_addr_space (void) vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; + +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm->pax_flags & MF_PAX_PAGEEXEC) { + vma->vm_flags &= ~VM_EXEC; + +#ifdef CONFIG_PAX_MPROTECT + if (current->mm->pax_flags & MF_PAX_MPROTECT) + vma->vm_flags &= ~VM_MAYEXEC; +#endif + + } +#endif + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 98b6849..8046766 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -464,7 +464,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) return ret; } -static struct dma_map_ops sn_dma_ops = { +static const struct dma_map_ops sn_dma_ops = { .alloc_coherent = sn_dma_alloc_coherent, .free_coherent = sn_dma_free_coherent, .map_page = sn_dma_map_page, diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c index 82abd15..d95ae5d 100644 --- a/arch/m32r/lib/usercopy.c +++ b/arch/m32r/lib/usercopy.c @@ -14,6 +14,9 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + prefetch(from); if (access_ok(VERIFY_WRITE, to, n)) __copy_user(to,from,n); @@ -23,6 +26,9 @@ __generic_copy_to_user(void __user *to, const void *from, unsigned long n) unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + prefetchw(to); if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to,from,n); diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 77f5021..2b1db8a 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -51,6 +51,8 @@ endif cflags-y := -ffunction-sections cflags-y += $(call cc-option, -mno-check-zero-division) +cflags-y += -Wno-sign-compare -Wno-extra + ifdef CONFIG_32BIT ld-emul = $(32bit-emul) vmlinux-32 = vmlinux diff --git a/arch/mips/alchemy/devboards/pm.c b/arch/mips/alchemy/devboards/pm.c index 632f986..fd0378d 100644 --- a/arch/mips/alchemy/devboards/pm.c +++ b/arch/mips/alchemy/devboards/pm.c @@ -78,7 +78,7 @@ static void db1x_pm_end(void) } -static struct platform_suspend_ops db1x_pm_ops = { +static const struct platform_suspend_ops db1x_pm_ops = { .valid = suspend_valid_only_mem, .begin = db1x_pm_begin, .enter = db1x_pm_enter, diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 7990694..4e93acf 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -368,4 +368,11 @@ extern int dump_task_fpu(struct task_struct *, elf_fpregset_t *); #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) #endif +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #endif /* _ASM_ELF_H */ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index f266295..627cfff 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -93,7 +93,7 @@ extern void copy_user_highpage(struct page *to, struct page *from, #ifdef CONFIG_CPU_MIPS32 typedef struct { unsigned long pte_low, pte_high; } pte_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) - #define __pte(x) ({ pte_t __pte = {(x), ((unsigned long long)(x)) >> 32}; __pte; }) + #define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; }) #else typedef struct { unsigned long long pte; } pte_t; #define pte_val(x) ((x).pte) diff --git a/arch/mips/include/asm/reboot.h b/arch/mips/include/asm/reboot.h index e48c0bf..f3acf65 100644 --- a/arch/mips/include/asm/reboot.h +++ b/arch/mips/include/asm/reboot.h @@ -9,7 +9,7 @@ #ifndef _ASM_REBOOT_H #define _ASM_REBOOT_H -extern void (*_machine_restart)(char *command); -extern void (*_machine_halt)(void); +extern void (*__noreturn _machine_restart)(char *command); +extern void (*__noreturn _machine_halt)(void); #endif /* _ASM_REBOOT_H */ diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index 83b5509..9fa24a23 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -230,6 +230,6 @@ extern void per_cpu_trap_init(void); */ #define __ARCH_WANT_UNLOCKED_CTXSW -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) #endif /* _ASM_SYSTEM_H */ diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 9fdd8bc..fcf9d68 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -50,6 +50,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include #include #include diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index ff44823..cf0b48a 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -52,6 +52,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT_ADDR) ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT_ADDR) ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include /* diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 50c9bb8..efdd5f8 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -245,6 +245,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, return -1; } +/* cannot be const */ struct kgdb_arch arch_kgdb_ops; /* diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index f3d73e1..bb3f57a 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -470,15 +470,3 @@ unsigned long get_wchan(struct task_struct *task) out: return pc; } - -/* - * Don't forget that the stack pointer must be aligned on a 8 bytes - * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. - */ -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; - - return sp & ALMASK; -} diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c index 060563a..7fbf310 100644 --- a/arch/mips/kernel/reset.c +++ b/arch/mips/kernel/reset.c @@ -19,8 +19,8 @@ * So handle all using function pointers to machine specific * functions. */ -void (*_machine_restart)(char *command); -void (*_machine_halt)(void); +void (*__noreturn _machine_restart)(char *command); +void (*__noreturn _machine_halt)(void); void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); @@ -29,16 +29,19 @@ void machine_restart(char *command) { if (_machine_restart) _machine_restart(command); + BUG(); } void machine_halt(void) { if (_machine_halt) _machine_halt(); + BUG(); } void machine_power_off(void) { if (pm_power_off) pm_power_off(); + BUG(); } diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3f7f466..3abe0b5 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -102,17 +102,21 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); vmm = find_vma(current->mm, addr); - if (task_size - len >= addr && - (!vmm || addr + len <= vmm->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vmm, addr, len)) return addr; } - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else @@ -122,7 +126,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, /* At this point: (!vmm || addr < vmm->vm_end). */ if (task_size - len < addr) return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) + if (check_heap_stack_gap(vmm, addr, len)) return addr; addr = vmm->vm_end; if (do_color_align) diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index e97a7a2..f18f5b0 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -26,6 +26,23 @@ #include #include /* For VMALLOC_END */ +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 9c802eb..0592e41 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -343,6 +343,13 @@ struct pt_regs; /* forward declaration... */ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x10000UL + +#define PAX_DELTA_MMAP_LEN 16 +#define PAX_DELTA_STACK_LEN 16 +#endif + /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index a27d2e2..18fd845 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -207,6 +207,17 @@ #define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) #define PAGE_COPY PAGE_EXECREAD #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 2120746..8d70a5e 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -95,16 +95,38 @@ /* three functions to determine where in the module core * or init pieces the location is */ +static inline int in_init_rx(struct module *me, void *loc) +{ + return (loc >= me->module_init_rx && + loc < (me->module_init_rx + me->init_size_rx)); +} + +static inline int in_init_rw(struct module *me, void *loc) +{ + return (loc >= me->module_init_rw && + loc < (me->module_init_rw + me->init_size_rw)); +} + static inline int in_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return in_init_rx(me, loc) || in_init_rw(me, loc); +} + +static inline int in_core_rx(struct module *me, void *loc) +{ + return (loc >= me->module_core_rx && + loc < (me->module_core_rx + me->core_size_rx)); +} + +static inline int in_core_rw(struct module *me, void *loc) +{ + return (loc >= me->module_core_rw && + loc < (me->module_core_rw + me->core_size_rw)); } static inline int in_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return in_core_rx(me, loc) || in_core_rw(me, loc); } static inline int in_local(struct module *me, void *loc) @@ -364,13 +386,13 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += gots * sizeof(struct got_entry); - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.fdesc_offset = me->core_size_rw; + me->core_size_rw += fdescs * sizeof(Elf_Fdesc); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -388,7 +410,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->module_core_rw + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -406,7 +428,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) #ifdef CONFIG_64BIT static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -424,7 +446,7 @@ static Elf_Addr get_fdesc(struct module *me, unsigned long value) /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* CONFIG_64BIT */ @@ -848,7 +870,7 @@ register_unwind_table(struct module *me, table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 9147391..f3d949a 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -43,7 +43,7 @@ static unsigned long get_unshared_area(unsigned long addr, unsigned long len) /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, addr, len)) return addr; addr = vma->vm_end; } @@ -79,7 +79,7 @@ static unsigned long get_shared_area(struct address_space *mapping, /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, addr, len)) return addr; addr = DCACHE_ALIGN(vma->vm_end - offset) + offset; if (addr < vma->vm_end) /* handle wraparound */ @@ -98,7 +98,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, if (flags & MAP_FIXED) return addr; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (filp) { addr = get_shared_area(filp->f_mapping, addr, len, pgoff); diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 8b58bf0..7afff03 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -733,9 +733,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm,regs->iaoq[0]); - if (vma && (regs->iaoq[0] >= vma->vm_start) - && (vma->vm_flags & VM_EXEC)) { - + if (vma && (regs->iaoq[0] >= vma->vm_start)) { fault_address = regs->iaoq[0]; fault_space = regs->iasq[0]; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index c6afbfc..c5839f6 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,7 @@ DEFINE_PER_CPU(struct exception_data, exception_data); static unsigned long parisc_acctyp(unsigned long code, unsigned int inst) { - if (code == 6 || code == 16) + if (code == 6 || code == 7 || code == 16) return VM_EXEC; switch (inst & 0xf0000000) { @@ -138,6 +139,116 @@ parisc_acctyp(unsigned long code, unsigned int inst) } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) + * + * returns 1 when task should be killed + * 2 when rt_sigreturn trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: unpatched PLT emulation */ + unsigned int bl, depwi; + + err = get_user(bl, (unsigned int *)instruction_pointer(regs)); + err |= get_user(depwi, (unsigned int *)(instruction_pointer(regs)+4)); + + if (err) + break; + + if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { + unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; + + err = get_user(ldw, (unsigned int *)addr); + err |= get_user(bv, (unsigned int *)(addr+4)); + err |= get_user(ldw2, (unsigned int *)(addr+8)); + + if (err) + break; + + if (ldw == 0x0E801096U && + bv == 0xEAC0C000U && + ldw2 == 0x0E881095U) + { + unsigned int resolver, map; + + err = get_user(resolver, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(map, (unsigned int *)(instruction_pointer(regs)+12)); + if (err) + break; + + regs->gr[20] = instruction_pointer(regs)+8; + regs->gr[21] = map; + regs->gr[22] = resolver; + regs->iaoq[0] = resolver | 3UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + return 3; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + +#ifndef CONFIG_PAX_EMUSIGRT + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: rt_sigreturn emulation */ + unsigned int ldi1, ldi2, bel, nop; + + err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); + err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); + err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); + + if (err) + break; + + if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && + ldi2 == 0x3414015AU && + bel == 0xE4008200U && + nop == 0x08000240U) + { + regs->gr[25] = (ldi1 & 2) >> 1; + regs->gr[20] = __NR_rt_sigreturn; + regs->gr[31] = regs->iaoq[1] + 16; + regs->sr[0] = regs->iasq[1]; + regs->iaoq[0] = 0x100UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + regs->iasq[0] = regs->sr[2]; + regs->iasq[1] = regs->sr[2]; + return 2; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fix; @@ -192,8 +303,33 @@ good_area: acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) + if ((vma->vm_flags & acc_type) != acc_type) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && + (address & ~3UL) == instruction_pointer(regs)) + { + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void *)instruction_pointer(regs), (void *)regs->gr[30]); + do_group_exit(SIGKILL); + } +#endif + goto bad_area; + } /* * If for any reason at all we couldn't handle the fault, make diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index c107b74..409dc0f 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -74,6 +74,8 @@ KBUILD_AFLAGS += -Iarch/$(ARCH) KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) +cflags-y += -Wno-sign-compare -Wno-extra + CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__ ifeq ($(CONFIG_PPC64),y) diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 6d94d27..50d4cad 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -14,7 +14,7 @@ struct dev_archdata { struct device_node *of_node; /* DMA operations on that device */ - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; /* * When an iommu is in use, dma_data is used as a ptr to the base of the diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index e281dae..2b8a784 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -69,9 +69,9 @@ static inline unsigned long device_to_mask(struct device *dev) #ifdef CONFIG_PPC64 extern struct dma_map_ops dma_iommu_ops; #endif -extern struct dma_map_ops dma_direct_ops; +extern const struct dma_map_ops dma_direct_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { /* We don't handle the NULL dev case for ISA for now. We could * do it via an out of line call but it is not needed for now. The @@ -84,7 +84,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return dev->archdata.dma_ops; } -static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { dev->archdata.dma_ops = ops; } @@ -118,7 +118,7 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off) static inline int dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if (unlikely(dma_ops == NULL)) return 0; @@ -132,7 +132,7 @@ static inline int dma_supported(struct device *dev, u64 mask) static inline int dma_set_mask(struct device *dev, u64 dma_mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if (unlikely(dma_ops == NULL)) return -EIO; @@ -147,7 +147,7 @@ static inline int dma_set_mask(struct device *dev, u64 dma_mask) static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); void *cpu_addr; BUG_ON(!dma_ops); @@ -162,7 +162,7 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size, static inline void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); BUG_ON(!dma_ops); @@ -173,7 +173,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if (dma_ops->mapping_error) return dma_ops->mapping_error(dev, dma_addr); diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 5698502..5db093c 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -179,8 +179,19 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000)) +#define ELF_ET_DYN_BASE (0x20000000) + +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (0x10000000UL) + +#ifdef __powerpc64__ +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 16 : 28) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 16 : 28) +#else +#define PAX_DELTA_MMAP_LEN 15 +#define PAX_DELTA_STACK_LEN 15 +#endif +#endif /* * Our registers are always unsigned longs, whether we're a 32 bit @@ -275,9 +286,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, (0x7ff >> (PAGE_SHIFT - 12)) : \ (0x3ffff >> (PAGE_SHIFT - 12))) -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #endif /* __KERNEL__ */ /* diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index edfc980..1766f59 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -116,6 +116,9 @@ extern void iommu_init_early_iSeries(void); extern void iommu_init_early_dart(void); extern void iommu_init_early_pasemi(void); +/* dma-iommu.c */ +extern int dma_iommu_dma_supported(struct device *dev, u64 mask); + #ifdef CONFIG_PCI extern void pci_iommu_init(void); extern void pci_direct_iommu_init(void); diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h index 9163695..5a00112 100644 --- a/arch/powerpc/include/asm/kmap_types.h +++ b/arch/powerpc/include/asm/kmap_types.h @@ -26,6 +26,7 @@ enum km_type { KM_SOFTIRQ1, KM_PPC_SYNC_PAGE, KM_PPC_SYNC_ICACHE, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index ff24254..fe45b21 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -116,8 +116,9 @@ extern phys_addr_t kernstart_addr; * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS32 \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) @@ -145,6 +146,9 @@ extern phys_addr_t kernstart_addr; #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) #endif +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + #ifndef __ASSEMBLY__ #undef STRICT_MM_TYPECHECKS diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 3f17b83..1f9e766 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -180,15 +180,18 @@ do { \ * stack by default, so in the absense of a PT_GNU_STACK program header * we turn execute permission off. */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS32 \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifndef CONFIG_PAX_PAGEEXEC #define VM_STACK_DEFAULT_FLAGS \ (test_thread_flag(TIF_32BIT) ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) +#endif #include diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index b5ea626..4030822 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -65,8 +65,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #ifdef CONFIG_PCI -extern void set_pci_dma_ops(struct dma_map_ops *dma_ops); -extern struct dma_map_ops *get_pci_dma_ops(void); +extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops); +extern const struct dma_map_ops *get_pci_dma_ops(void); #else /* CONFIG_PCI */ #define set_pci_dma_ops(d) #define get_pci_dma_ops() NULL diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 2a5da06..d65bea2 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_POWERPC_PGTABLE_H #ifdef __KERNEL__ +#include #ifndef __ASSEMBLY__ #include /* For TASK_SIZE */ #include diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/pte-hash32.h index 4aad413..85d86bf 100644 --- a/arch/powerpc/include/asm/pte-hash32.h +++ b/arch/powerpc/include/asm/pte-hash32.h @@ -21,6 +21,7 @@ #define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */ #define _PAGE_USER 0x004 /* usermode access allowed */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ +#define _PAGE_EXEC _PAGE_GUARDED #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ #define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 8c34149..78f425a 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -103,7 +103,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); } while(0) struct task_struct; -extern unsigned long ptrace_get_reg(struct task_struct *task, int regno); +extern unsigned long ptrace_get_reg(struct task_struct *task, unsigned int regno); extern int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 32a7c30..be3a8bb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -191,6 +191,7 @@ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ #define DSISR_NOHPTE 0x40000000 /* no translation found */ +#define DSISR_GUARDED 0x10000000 /* fetch from guarded storage */ #define DSISR_PROTFAULT 0x08000000 /* protection fault */ #define DSISR_ISSTORE 0x02000000 /* access was a store */ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index 8979d4c..d2fd0d3 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -13,7 +13,7 @@ #include -extern struct dma_map_ops swiotlb_dma_ops; +extern const struct dma_map_ops swiotlb_dma_ops; static inline void dma_mark_clean(void *addr, size_t size) {} diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index 094a12a..877a60a 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -531,7 +531,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) #endif -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) /* Used in very early kernel initialization. */ extern unsigned long reloc_offset(void); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index bd0fb84..a42a14b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -13,6 +13,8 @@ #define VERIFY_READ 0 #define VERIFY_WRITE 1 +extern void check_object_size(const void *ptr, unsigned long n, bool to); + /* * The fs value determines whether argument validity checking should be * performed or not. If get_fs() == USER_DS, checking is performed, with @@ -327,52 +329,6 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifndef __powerpc64__ - -static inline unsigned long copy_from_user(void *to, - const void __user *from, unsigned long n) -{ - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) - return __copy_tofrom_user((__force void __user *)to, from, n); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - return __copy_tofrom_user((__force void __user *)to, from, - n - over) + over; - } - return n; -} - -static inline unsigned long copy_to_user(void __user *to, - const void *from, unsigned long n) -{ - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_tofrom_user(to, (__force void __user *)from, n); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - return __copy_tofrom_user(to, (__force void __user *)from, - n - over) + over; - } - return n; -} - -#else /* __powerpc64__ */ - -#define __copy_in_user(to, from, size) \ - __copy_tofrom_user((to), (from), (size)) - -extern unsigned long copy_from_user(void *to, const void __user *from, - unsigned long n); -extern unsigned long copy_to_user(void __user *to, const void *from, - unsigned long n); -extern unsigned long copy_in_user(void __user *to, const void __user *from, - unsigned long n); - -#endif /* __powerpc64__ */ - static inline unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { @@ -396,6 +352,10 @@ static inline unsigned long __copy_from_user_inatomic(void *to, if (ret == 0) return 0; } + + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + return __copy_tofrom_user((__force void __user *)to, from, n); } @@ -422,6 +382,10 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, if (ret == 0) return 0; } + + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return __copy_tofrom_user(to, (__force const void __user *)from, n); } @@ -439,6 +403,92 @@ static inline unsigned long __copy_to_user(void __user *to, return __copy_to_user_inatomic(to, from, size); } +#ifndef __powerpc64__ + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, unsigned long n) +{ + unsigned long over; + + if ((long)n < 0) + return n; + + if (access_ok(VERIFY_READ, from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + return __copy_tofrom_user((__force void __user *)to, from, n); + } + if ((unsigned long)from < TASK_SIZE) { + over = (unsigned long)from + n - TASK_SIZE; + if (!__builtin_constant_p(n - over)) + check_object_size(to, n - over, false); + return __copy_tofrom_user((__force void __user *)to, from, + n - over) + over; + } + return n; +} + +static inline unsigned long __must_check copy_to_user(void __user *to, + const void *from, unsigned long n) +{ + unsigned long over; + + if ((long)n < 0) + return n; + + if (access_ok(VERIFY_WRITE, to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return __copy_tofrom_user(to, (__force void __user *)from, n); + } + if ((unsigned long)to < TASK_SIZE) { + over = (unsigned long)to + n - TASK_SIZE; + if (!__builtin_constant_p(n)) + check_object_size(from, n - over, true); + return __copy_tofrom_user(to, (__force void __user *)from, + n - over) + over; + } + return n; +} + +#else /* __powerpc64__ */ + +#define __copy_in_user(to, from, size) \ + __copy_tofrom_user((to), (from), (size)) + +static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if ((long)n < 0 || n > INT_MAX) + return n; + + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if ((long)n < 0 || n > INT_MAX) + return n; + + if (likely(access_ok(VERIFY_WRITE, to, n))) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + n = __copy_to_user(to, from, n); + } + return n; +} + +extern unsigned long copy_in_user(void __user *to, const void __user *from, + unsigned long n); + +#endif /* __powerpc64__ */ + extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index bb37b1d..01fe9ce 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -642,7 +642,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = { &cache_assoc_attr, }; -static struct sysfs_ops cache_index_ops = { +static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 37771a5..648530c 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -70,7 +70,7 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, } /* We support DMA to/from any memory page via the iommu */ -static int dma_iommu_dma_supported(struct device *dev, u64 mask) +int dma_iommu_dma_supported(struct device *dev, u64 mask) { struct iommu_table *tbl = get_iommu_table_base(dev); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index e96cbbd..bdd6d41 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -31,7 +31,7 @@ unsigned int ppc_swiotlb_enable; * map_page, and unmap_page on highmem, use normal dma_ops * for everything else. */ -struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops swiotlb_dma_ops = { .alloc_coherent = dma_direct_alloc_coherent, .free_coherent = dma_direct_free_coherent, .map_sg = swiotlb_map_sg_attrs, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6215062..ebea59c 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -134,7 +134,7 @@ static inline void dma_direct_sync_single_range(struct device *dev, } #endif -struct dma_map_ops dma_direct_ops = { +const struct dma_map_ops dma_direct_ops = { .alloc_coherent = dma_direct_alloc_coherent, .free_coherent = dma_direct_free_coherent, .map_sg = dma_direct_map_sg, diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 24dcc0e..a300455 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -455,6 +455,7 @@ storage_fault_common: std r14,_DAR(r1) std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs mr r4,r14 mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) @@ -464,8 +465,7 @@ storage_fault_common: cmpdi r3,0 bne- 1f b .ret_from_except_lite -1: bl .save_nvgprs - mr r5,r3 +1: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl .bad_page_fault diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1808876..9fd206a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -818,10 +818,10 @@ handle_page_fault: 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs bl .do_page_fault cmpdi r3,0 beq+ 13f - bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a4c8b38..1b09ad9 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -127,7 +127,7 @@ static int ibmebus_dma_supported(struct device *dev, u64 mask) return 1; } -static struct dma_map_ops ibmebus_dma_ops = { +static const struct dma_map_ops ibmebus_dma_ops = { .alloc_coherent = ibmebus_alloc_coherent, .free_coherent = ibmebus_free_coherent, .map_sg = ibmebus_map_sg, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 641c74b..8339ad7 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -126,7 +126,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0) return 0; - if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) + if (*(u32 *) (regs->nip) == *(const u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) regs->nip += 4; return 1; @@ -353,7 +353,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, /* * Global data */ -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, }; diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663..4f50234 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -31,11 +31,24 @@ LIST_HEAD(module_bug_list); +#ifdef CONFIG_PAX_KERNEXEC void *module_alloc(unsigned long size) { if (size == 0) return NULL; + return vmalloc(size); +} + +void *module_alloc_exec(unsigned long size) +#else +void *module_alloc(unsigned long size) +#endif + +{ + if (size == 0) + return NULL; + return vmalloc_exec(size); } @@ -45,6 +58,13 @@ void module_free(struct module *mod, void *module_region) vfree(module_region); } +#ifdef CONFIG_PAX_KERNEXEC +void module_free_exec(struct module *mod, void *module_region) +{ + module_free(mod, module_region); +} +#endif + static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f832773..0507238 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -162,7 +162,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + printk("Module %s doesn't contain .plt or .init.plt sections.\n", me->name); return -ENOEXEC; } @@ -203,11 +203,16 @@ static uint32_t do_plt_call(void *location, DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) || + (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; - else + else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) || + (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw)) entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + else { + printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name); + return ~0UL; + } /* Find this entry, or if that fails, the next avail. entry */ while (entry->jump[0]) { diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cadbed6..b9bbb00 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -50,14 +50,14 @@ resource_size_t isa_mem_base; unsigned int ppc_pci_flags = 0; -static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; +static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops; -void set_pci_dma_ops(struct dma_map_ops *dma_ops) +void set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } -struct dma_map_ops *get_pci_dma_ops(void) +const struct dma_map_ops *get_pci_dma_ops(void) { return pci_dma_ops; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7b816da..8d5c277 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -539,8 +539,8 @@ void show_regs(struct pt_regs * regs) * Lookup NIP late so we have the best change of getting the * above info out without failing */ - printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); - printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); + printk("NIP ["REG"] %pA\n", regs->nip, (void *)regs->nip); + printk("LR ["REG"] %pA\n", regs->link, (void *)regs->link); #endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) @@ -1034,10 +1034,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe || ip != lr) { - printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); + printk("["REG"] ["REG"] %pA", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((ip == rth || ip == mrth) && curr_frame >= 0) { - printk(" (%pS)", + printk(" (%pA)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; } @@ -1057,7 +1057,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); lr = regs->link; - printk("--- Exception: %lx at %pS\n LR = %pS\n", + printk("--- Exception: %lx at %pA\n LR = %pA\n", regs->trap, (void *)regs->nip, (void *)lr); firstframe = 1; } @@ -1134,58 +1134,3 @@ void thread_info_cache_init(void) } #endif /* THREAD_SHIFT < PAGE_SHIFT */ - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; - return sp & ~0xf; -} - -static inline unsigned long brk_rnd(void) -{ - unsigned long rnd = 0; - - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); - else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); - - return rnd << PAGE_SHIFT; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long base = mm->brk; - unsigned long ret; - -#ifdef CONFIG_PPC_STD_MMU_64 - /* - * If we are using 1TB segments and we are allowed to randomise - * the heap, we can put it above 1TB so it is backed by a 1TB - * segment. Otherwise the heap will be in the bottom 1TB - * which always uses 256MB segments and this may result in a - * performance penalty. - */ - if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) - base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); -#endif - - ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - - return ret; -} - -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < base) - return base; - - return ret; -} diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ef14988..856c4bc 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -86,7 +86,7 @@ static int set_user_trap(struct task_struct *task, unsigned long trap) /* * Get contents of register REGNO in task TASK. */ -unsigned long ptrace_get_reg(struct task_struct *task, int regno) +unsigned long ptrace_get_reg(struct task_struct *task, unsigned int regno) { if (task->thread.regs == NULL) return -EIO; @@ -894,7 +894,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) CHECK_FULL_REGS(child->thread.regs); if (index < PT_FPR0) { - tmp = ptrace_get_reg(child, (int) index); + tmp = ptrace_get_reg(child, index); } else { flush_fp_to_thread(child); tmp = ((unsigned long *)child->thread.fpr) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d670429..2bc59b2 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -857,7 +857,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; addr = frame; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { if (save_user_regs(regs, frame, 0, 1)) goto badframe; regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2fe6fc6..ada0d96 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -429,7 +429,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, current->thread.fpscr.val = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b97c2d6..dd01a6a 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -563,10 +563,10 @@ asmlinkage long compat_sys_sysctl(struct __sysctl_args32 __user *args) if (oldlenp) { if (!error) { if (get_user(oldlen, oldlenp) || - put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp))) + put_user(oldlen, (compat_size_t __user *)compat_ptr(tmp.oldlenp)) || + copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused))) error = -EFAULT; } - copy_to_user(args->__unused, tmp.__unused, sizeof(tmp.__unused)); } return error; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a..e4b6a56 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -99,6 +99,8 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif +extern void gr_handle_kernel_exploit(void); + int die(const char *str, struct pt_regs *regs, long err) { static struct { @@ -168,6 +170,8 @@ int die(const char *str, struct pt_regs *regs, long err) if (panic_on_oops) panic("Fatal exception"); + gr_handle_kernel_exploit(); + oops_exit(); do_exit(err); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 137dc22..fe57a79 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "setup.h" @@ -220,7 +221,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_base = VDSO32_MBASE; #endif - current->mm->context.vdso_base = 0; + current->mm->context.vdso_base = ~0UL; /* vDSO has a problem and was disabled, just don't "enable" it for the * process @@ -240,7 +241,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_base = get_unmapped_area(NULL, vdso_base, (vdso_pages << PAGE_SHIFT) + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), - 0, 0); + 0, MAP_PRIVATE | MAP_EXECUTABLE); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; goto fail_mmapsem; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 77f6421..829564a 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -601,11 +601,12 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, vio_cmo_dealloc(viodev, alloc_size); } -struct dma_map_ops vio_dma_mapping_ops = { +static const struct dma_map_ops vio_dma_mapping_ops = { .alloc_coherent = vio_dma_iommu_alloc_coherent, .free_coherent = vio_dma_iommu_free_coherent, .map_sg = vio_dma_iommu_map_sg, .unmap_sg = vio_dma_iommu_unmap_sg, + .dma_supported = dma_iommu_dma_supported, .map_page = vio_dma_iommu_map_page, .unmap_page = vio_dma_iommu_unmap_page, @@ -857,7 +858,6 @@ static void vio_cmo_bus_remove(struct vio_dev *viodev) static void vio_cmo_set_dma_ops(struct vio_dev *viodev) { - vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported; viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops; } diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c index 5eea6f3..5d10396 100644 --- a/arch/powerpc/lib/usercopy_64.c +++ b/arch/powerpc/lib/usercopy_64.c @@ -9,22 +9,6 @@ #include #include -unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; -} - -unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_WRITE, to, n))) - n = __copy_to_user(to, from, n); - return n; -} - unsigned long copy_in_user(void __user *to, const void __user *from, unsigned long n) { @@ -35,7 +19,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, return n; } -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(copy_in_user); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index e7dae82..877ce0d 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,6 +30,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -40,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_KPROBES @@ -64,6 +69,33 @@ static inline int notify_page_fault(struct pt_regs *regs) } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int __user *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. @@ -134,7 +166,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * indicate errors in DSISR but can validly be set in SRR1. */ if (trap == 0x400) - error_code &= 0x48200000; + error_code &= 0x58200000; else is_write = error_code & DSISR_ISSTORE; #else @@ -250,7 +282,7 @@ good_area: * "undefined". Of those that can be set, this is the only * one which seems bad. */ - if (error_code & 0x10000000) + if (error_code & DSISR_GUARDED) /* Guarded storage error. */ goto bad_area; #endif /* CONFIG_8xx */ @@ -265,7 +297,7 @@ good_area: * processors use the same I/D cache coherency mechanism * as embedded. */ - if (error_code & DSISR_PROTFAULT) + if (error_code & (DSISR_PROTFAULT | DSISR_GUARDED)) goto bad_area; #endif /* CONFIG_PPC_STD_MMU */ @@ -335,6 +367,23 @@ bad_area: bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { +#ifdef CONFIG_PPC_STD_MMU + if (is_exec && (error_code & (DSISR_PROTFAULT | DSISR_GUARDED))) { +#else + if (is_exec && regs->nip == address) { +#endif + switch (pax_handle_fetch_fault(regs)) { + } + + pax_report_fault(regs, (void *)regs->nip, (void *)regs->gpr[PT_R1]); + do_group_exit(SIGKILL); + } + } +#endif + _exception(SIGSEGV, regs, code, address); return 0; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5973631..ad617af 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -250,7 +250,7 @@ static int __init mark_nonram_nosave(void) { unsigned long lmb_next_region_start_pfn, lmb_region_max_pfn; - int i; + unsigned int i; for (i = 0; i < lmb.memory.cnt - 1; i++) { lmb_region_max_pfn = diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 0d957a4..26d968f 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c @@ -99,10 +99,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ba51948..23009d9 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -98,7 +98,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, if ((mm->task_size - len) < addr) return 0; vma = find_vma(mm, addr); - return (!vma || (addr + len) <= vma->vm_start); + return check_heap_stack_gap(vma, addr, len); } static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) @@ -256,7 +256,7 @@ full_search: addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); continue; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr, len)) { /* * Remember the place where we stopped the search: */ @@ -313,10 +313,14 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, } } - addr = mm->mmap_base; - while (addr > len) { + if (mm->mmap_base < len) + addr = -ENOMEM; + else + addr = mm->mmap_base - len; + + while (!IS_ERR_VALUE(addr)) { /* Go down by chunk size */ - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); + addr = _ALIGN_DOWN(addr, 1ul << pshift); /* Check for hit with different page size */ mask = slice_range_to_mask(addr, len); @@ -336,7 +340,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * return with success: */ vma = find_vma(mm, addr); - if (!vma || (addr + len) <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr, len)) { /* remember the address as a hint for next time */ if (use_cache) mm->free_area_cache = addr; @@ -348,7 +352,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start; + addr = skip_heap_stack_gap(vma, len); } /* @@ -426,6 +430,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, if (fixed && addr > (mm->task_size - len)) return -EINVAL; +#ifdef CONFIG_PAX_RANDMMAP + if (!fixed && (mm->pax_flags & MF_PAX_RANDMMAP)) + addr = 0; +#endif + /* If hint, make sure it matches our alignment restrictions */ if (!fixed && addr) { addr = _ALIGN_UP(addr, 1ul << pshift); diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c index b5c753d..8f01abe 100644 --- a/arch/powerpc/platforms/52xx/lite5200_pm.c +++ b/arch/powerpc/platforms/52xx/lite5200_pm.c @@ -235,7 +235,7 @@ static void lite5200_pm_end(void) lite5200_pm_target_state = PM_SUSPEND_ON; } -static struct platform_suspend_ops lite5200_pm_ops = { +static const struct platform_suspend_ops lite5200_pm_ops = { .valid = lite5200_pm_valid, .begin = lite5200_pm_begin, .prepare = lite5200_pm_prepare, diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c index a55b0b6..478c18e 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c @@ -180,7 +180,7 @@ void mpc52xx_pm_finish(void) iounmap(mbar); } -static struct platform_suspend_ops mpc52xx_pm_ops = { +static const struct platform_suspend_ops mpc52xx_pm_ops = { .valid = mpc52xx_pm_valid, .prepare = mpc52xx_pm_prepare, .enter = mpc52xx_pm_enter, diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 08e65fc..643d3ac 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -273,7 +273,7 @@ static int mpc83xx_is_pci_agent(void) return ret; } -static struct platform_suspend_ops mpc83xx_suspend_ops = { +static const struct platform_suspend_ops mpc83xx_suspend_ops = { .valid = mpc83xx_suspend_valid, .begin = mpc83xx_suspend_begin, .enter = mpc83xx_suspend_enter, diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca5bfdf..1602e09 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -642,7 +642,7 @@ static int dma_fixed_dma_supported(struct device *dev, u64 mask) static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask); -struct dma_map_ops dma_iommu_fixed_ops = { +const struct dma_map_ops dma_iommu_fixed_ops = { .alloc_coherent = dma_fixed_alloc_coherent, .free_coherent = dma_fixed_free_coherent, .map_sg = dma_fixed_map_sg, diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index e34b305..20e48ec 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -694,7 +694,7 @@ static int ps3_dma_supported(struct device *_dev, u64 mask) return mask >= DMA_BIT_MASK(32); } -static struct dma_map_ops ps3_sb_dma_ops = { +static const struct dma_map_ops ps3_sb_dma_ops = { .alloc_coherent = ps3_alloc_coherent, .free_coherent = ps3_free_coherent, .map_sg = ps3_sb_map_sg, @@ -704,7 +704,7 @@ static struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, }; -static struct dma_map_ops ps3_ioc0_dma_ops = { +static const struct dma_map_ops ps3_ioc0_dma_ops = { .alloc_coherent = ps3_alloc_coherent, .free_coherent = ps3_free_coherent, .map_sg = ps3_ioc0_map_sg, diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index f0e6f28..60d53ed 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -2,6 +2,8 @@ config PPC_PSERIES depends on PPC64 && PPC_BOOK3S bool "IBM pSeries & new (POWER5-based) iSeries" select MPIC + select PCI_MSI + select XICS select PPC_I8259 select PPC_RTAS select RTAS_ERROR_LOGGING diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43c0aca..42c045b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -194,28 +194,26 @@ config AUDIT_ARCH config S390_SWITCH_AMODE bool "Switch kernel/user addressing modes" + default y help This option allows to switch the addressing modes of kernel and user - space. The kernel parameter switch_amode=on will enable this feature, - default is disabled. Enabling this (via kernel parameter) on machines - earlier than IBM System z9-109 EC/BC will reduce system performance. + space. Enabling this on machines earlier than IBM System z9-109 EC/BC + will reduce system performance. Note that this option will also be selected by selecting the execute - protection option below. Enabling the execute protection via the - noexec kernel parameter will also switch the addressing modes, - independent of the switch_amode kernel parameter. + protection option below. Enabling the execute protection will also + switch the addressing modes, independent of this option. config S390_EXEC_PROTECT bool "Data execute protection" + default y select S390_SWITCH_AMODE help This option allows to enable a buffer overflow protection for user space programs and it also selects the addressing mode option above. - The kernel parameter noexec=on will enable this feature and also - switch the addressing modes, default is disabled. Enabling this (via - kernel parameter) on machines earlier than IBM System z9-109 EC/BC - will reduce system performance. + Enabling this on machines earlier than IBM System z9-109 EC/BC will + reduce system performance. comment "Code generation options" diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index e885442..5e6c303 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -164,6 +164,13 @@ extern unsigned int vdso_enabled; that it will "exec", and that there is sufficient room for the brk. */ #define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_31BIT) ? 0x10000UL : 0x80000000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_31BIT) ? 15 : 26 ) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_31BIT) ? 15 : 26 ) +#endif + /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index e37478e..9ce0e9f 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -50,13 +50,13 @@ extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); #ifdef CONFIG_S390_SWITCH_AMODE -extern unsigned int switch_amode; +#define switch_amode (1) #else #define switch_amode (0) #endif #ifdef CONFIG_S390_EXEC_PROTECT -extern unsigned int s390_noexec; +#define s390_noexec (1) #else #define s390_noexec (0) #endif diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 8377e91..e28e6f1 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -232,6 +232,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + + if ((long)n < 0) + return n; + if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; @@ -257,6 +261,9 @@ copy_to_user(void __user *to, const void *from, unsigned long n) static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + if (__builtin_constant_p(n) && (n <= 256)) return uaccess.copy_from_user_small(n, from, to); else @@ -283,6 +290,10 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + + if ((long)n < 0) + return n; + if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); else diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 639380a..72e3c02 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -166,11 +166,11 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_size_rw = ALIGN(me->core_size_rw, 4); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += me->arch.got_size; + me->arch.plt_offset = me->core_size_rx; + me->core_size_rx += me->arch.plt_size; return 0; } @@ -256,7 +256,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->module_core_rw + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -280,7 +280,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + (val + (Elf_Addr) me->module_core_rw - loc) >> 1; else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) *(unsigned long *) loc = val; @@ -294,7 +294,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->module_core_rx + me->arch.plt_offset + info->plt_offset; #ifndef CONFIG_64BIT ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ @@ -319,7 +319,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->module_core_rx + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; @@ -341,7 +341,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->module_core_rw + me->arch.got_offset); if (r_type == R_390_GOTOFF16) *(unsigned short *) loc = val; else if (r_type == R_390_GOTOFF32) @@ -351,7 +351,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->module_core_rw + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) *(unsigned int *) loc = val; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 061479f..dbfb08c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -306,9 +306,6 @@ static int __init early_parse_mem(char *p) early_param("mem", early_parse_mem); #ifdef CONFIG_S390_SWITCH_AMODE -unsigned int switch_amode = 0; -EXPORT_SYMBOL_GPL(switch_amode); - static int set_amode_and_uaccess(unsigned long user_amode, unsigned long user32_amode) { @@ -334,17 +331,6 @@ static int set_amode_and_uaccess(unsigned long user_amode, return 0; } } - -/* - * Switch kernel/user addressing modes? - */ -static int __init early_parse_switch_amode(char *p) -{ - switch_amode = 1; - return 0; -} -early_param("switch_amode", early_parse_switch_amode); - #else /* CONFIG_S390_SWITCH_AMODE */ static inline int set_amode_and_uaccess(unsigned long user_amode, unsigned long user32_amode) @@ -353,24 +339,6 @@ static inline int set_amode_and_uaccess(unsigned long user_amode, } #endif /* CONFIG_S390_SWITCH_AMODE */ -#ifdef CONFIG_S390_EXEC_PROTECT -unsigned int s390_noexec = 0; -EXPORT_SYMBOL_GPL(s390_noexec); - -/* - * Enable execute protection? - */ -static int __init early_parse_noexec(char *p) -{ - if (!strncmp(p, "off", 3)) - return 0; - switch_amode = 1; - s390_noexec = 1; - return 0; -} -early_param("noexec", early_parse_noexec); -#endif /* CONFIG_S390_EXEC_PROTECT */ - static void setup_addressing_mode(void) { if (s390_noexec) { diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index f4558cc..e461f37 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -78,10 +78,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } @@ -153,10 +165,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = s390_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = s390_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/score/include/asm/system.h b/arch/score/include/asm/system.h index 589d5c7..669e274 100644 --- a/arch/score/include/asm/system.h +++ b/arch/score/include/asm/system.h @@ -17,7 +17,7 @@ do { \ #define finish_arch_switch(prev) do {} while (0) typedef void (*vi_handler_t)(void); -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) (x) #define mb() barrier() #define rmb() barrier() diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 25d0803..d6c8e36 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -161,8 +161,3 @@ unsigned long get_wchan(struct task_struct *task) return task_pt_regs(task)->cp0_epc; } - -unsigned long arch_align_stack(unsigned long sp) -{ - return sp; -} diff --git a/arch/sh/boards/mach-hp6xx/pm.c b/arch/sh/boards/mach-hp6xx/pm.c index d936c1a..304a252 100644 --- a/arch/sh/boards/mach-hp6xx/pm.c +++ b/arch/sh/boards/mach-hp6xx/pm.c @@ -143,7 +143,7 @@ static int hp6x0_pm_enter(suspend_state_t state) return 0; } -static struct platform_suspend_ops hp6x0_pm_ops = { +static const struct platform_suspend_ops hp6x0_pm_ops = { .enter = hp6x0_pm_enter, .valid = suspend_valid_only_mem, }; diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 8a8a993..7b3079b 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -327,7 +327,7 @@ static struct attribute *sq_sysfs_attrs[] = { NULL, }; -static struct sysfs_ops sq_sysfs_ops = { +static const struct sysfs_ops sq_sysfs_ops = { .show = sq_sysfs_show, .store = sq_sysfs_store, }; diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index ee3c2aa..c49cee6 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -58,7 +58,7 @@ static int sh_pm_enter(suspend_state_t state) return 0; } -static struct platform_suspend_ops sh_pm_ops = { +static const struct platform_suspend_ops sh_pm_ops = { .enter = sh_pm_enter, .valid = suspend_valid_only_mem, }; diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index 3e532d0..9faa306 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -271,7 +271,7 @@ void kgdb_arch_exit(void) { } -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: trapa #0x3c */ #ifdef CONFIG_CPU_LITTLE_ENDIAN .gdb_bpt_instr = { 0x3c, 0xc3 }, diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c index afeb710..d1d1289 100644 --- a/arch/sh/mm/mmap.c +++ b/arch/sh/mm/mmap.c @@ -74,8 +74,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } @@ -106,7 +105,7 @@ full_search: } return -ENOMEM; } - if (likely(!vma || addr + len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, addr, len))) { /* * Remember the place where we stopped the search: */ @@ -157,8 +156,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } @@ -179,7 +177,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr - len, len)) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr-len); } @@ -188,18 +186,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = mm->mmap_base-len; - if (do_colour_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); + addr = mm->mmap_base - len; do { + if (do_colour_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, addr, len))) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); } @@ -209,10 +207,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - if (do_colour_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len); + } while (!IS_ERR_VALUE(addr)); bottomup: /* diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 113225b..7fd04e7 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -75,7 +75,7 @@ drivers-$(CONFIG_OPROFILE) += arch/sparc/oprofile/ # Export what is needed by arch/sparc/boot/Makefile export VMLINUX_INIT VMLINUX_MAIN VMLINUX_INIT := $(head-y) $(init-y) -VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ VMLINUX_MAIN += $(patsubst %/, %/lib.a, $(libs-y)) $(libs-y) VMLINUX_MAIN += $(drivers-y) $(net-y) diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f5cc06f..f858d47 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -14,18 +14,40 @@ #define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) +static inline int atomic_read_unchecked(const atomic_unchecked_t *v) +{ + return v->counter; +} #define atomic64_read(v) ((v)->counter) +static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v) +{ + return v->counter; +} #define atomic_set(v, i) (((v)->counter) = i) +static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i) +{ + v->counter = i; +} #define atomic64_set(v, i) (((v)->counter) = i) +static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i) +{ + v->counter = i; +} extern void atomic_add(int, atomic_t *); +extern void atomic_add_unchecked(int, atomic_unchecked_t *); extern void atomic64_add(long, atomic64_t *); +extern void atomic64_add_unchecked(long, atomic64_unchecked_t *); extern void atomic_sub(int, atomic_t *); +extern void atomic_sub_unchecked(int, atomic_unchecked_t *); extern void atomic64_sub(long, atomic64_t *); +extern void atomic64_sub_unchecked(long, atomic64_unchecked_t *); extern int atomic_add_ret(int, atomic_t *); +extern int atomic_add_ret_unchecked(int, atomic_unchecked_t *); extern long atomic64_add_ret(long, atomic64_t *); +extern long atomic64_add_ret_unchecked(long, atomic64_unchecked_t *); extern int atomic_sub_ret(int, atomic_t *); extern long atomic64_sub_ret(long, atomic64_t *); @@ -33,13 +55,29 @@ extern long atomic64_sub_ret(long, atomic64_t *); #define atomic64_dec_return(v) atomic64_sub_ret(1, v) #define atomic_inc_return(v) atomic_add_ret(1, v) +static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_ret_unchecked(1, v); +} #define atomic64_inc_return(v) atomic64_add_ret(1, v) +static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v) +{ + return atomic64_add_ret_unchecked(1, v); +} #define atomic_sub_return(i, v) atomic_sub_ret(i, v) #define atomic64_sub_return(i, v) atomic64_sub_ret(i, v) #define atomic_add_return(i, v) atomic_add_ret(i, v) +static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ + return atomic_add_ret_unchecked(i, v); +} #define atomic64_add_return(i, v) atomic64_add_ret(i, v) +static inline long atomic64_add_return_unchecked(long i, atomic64_unchecked_t *v) +{ + return atomic64_add_ret_unchecked(i, v); +} /* * atomic_inc_and_test - increment and test @@ -50,6 +88,10 @@ extern long atomic64_sub_ret(long, atomic64_t *); * other cases. */ #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) +static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v) +{ + return atomic_inc_return_unchecked(v) == 0; +} #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic_sub_and_test(i, v) (atomic_sub_ret(i, v) == 0) @@ -59,30 +101,65 @@ extern long atomic64_sub_ret(long, atomic64_t *); #define atomic64_dec_and_test(v) (atomic64_sub_ret(1, v) == 0) #define atomic_inc(v) atomic_add(1, v) +static inline void atomic_inc_unchecked(atomic_unchecked_t *v) +{ + atomic_add_unchecked(1, v); +} #define atomic64_inc(v) atomic64_add(1, v) +static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v) +{ + atomic64_add_unchecked(1, v); +} #define atomic_dec(v) atomic_sub(1, v) +static inline void atomic_dec_unchecked(atomic_unchecked_t *v) +{ + atomic_sub_unchecked(1, v); +} #define atomic64_dec(v) atomic64_sub(1, v) +static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v) +{ + atomic64_sub_unchecked(1, v); +} #define atomic_add_negative(i, v) (atomic_add_ret(i, v) < 0) #define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new) +{ + return xchg(&v->counter, new); +} static inline int atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; + int c, old, new; c = atomic_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic_cmpxchg((v), c, c + (a)); + + asm volatile("addcc %2, %0, %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "tvs %%icc, 6\n" +#endif + + : "=r" (new) + : "0" (c), "ir" (a) + : "cc"); + + old = atomic_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -90,20 +167,35 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic64_cmpxchg(v, o, n) \ ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) +static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new) +{ + return xchg(&v->counter, new); +} static inline long atomic64_add_unless(atomic64_t *v, long a, long u) { - long c, old; + long c, old, new; c = atomic64_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic64_cmpxchg((v), c, c + (a)); + + asm volatile("addcc %2, %0, %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "tvs %%xcc, 6\n" +#endif + + : "=r" (new) + : "0" (c), "ir" (a) + : "cc"); + + old = atomic64_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h index 41f85ae..fb54d5e 100644 --- a/arch/sparc/include/asm/cache.h +++ b/arch/sparc/include/asm/cache.h @@ -8,7 +8,7 @@ #define _SPARC_CACHE_H #define L1_CACHE_SHIFT 5 -#define L1_CACHE_BYTES 32 +#define L1_CACHE_BYTES 32UL #define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))) #ifdef CONFIG_SPARC32 diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 5a8c308..38def92 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -14,10 +14,10 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask); #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #define dma_is_consistent(d, h) (1) -extern struct dma_map_ops *dma_ops, pci32_dma_ops; +extern const struct dma_map_ops *dma_ops, pci32_dma_ops; extern struct bus_type pci_bus_type; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) if (dev->bus == &pci_bus_type) @@ -31,7 +31,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); void *cpu_addr; cpu_addr = ops->alloc_coherent(dev, size, dma_handle, flag); @@ -42,7 +42,7 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size, static inline void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); ops->free_coherent(dev, size, cpu_addr, dma_handle); diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h index 381a1b5..b97e3ff 100644 --- a/arch/sparc/include/asm/elf_32.h +++ b/arch/sparc/include/asm/elf_32.h @@ -116,6 +116,13 @@ typedef struct { #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x10000UL + +#define PAX_DELTA_MMAP_LEN 16 +#define PAX_DELTA_STACK_LEN 16 +#endif + /* This yields a mask that user programs can use to figure out what instruction set this cpu supports. This can NOT be done in userspace on Sparc. */ diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 9968085..c2106ef 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -163,6 +163,12 @@ typedef struct { #define ELF_ET_DYN_BASE 0x0000010000000000UL #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT) ? 0x10000UL : 0x100000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 14 : 28) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 15 : 29) +#endif /* This yields a mask that user programs can use to figure out what instruction set this cpu supports. */ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index e0cabe7..efd60f1 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -43,6 +43,13 @@ BTFIXUPDEF_SIMM13(user_ptrs_per_pgd) BTFIXUPDEF_INT(page_none) BTFIXUPDEF_INT(page_copy) BTFIXUPDEF_INT(page_readonly) + +#ifdef CONFIG_PAX_PAGEEXEC +BTFIXUPDEF_INT(page_shared_noexec) +BTFIXUPDEF_INT(page_copy_noexec) +BTFIXUPDEF_INT(page_readonly_noexec) +#endif + BTFIXUPDEF_INT(page_kernel) #define PMD_SHIFT SUN4C_PMD_SHIFT @@ -64,6 +71,16 @@ extern pgprot_t PAGE_SHARED; #define PAGE_COPY __pgprot(BTFIXUP_INT(page_copy)) #define PAGE_READONLY __pgprot(BTFIXUP_INT(page_readonly)) +#ifdef CONFIG_PAX_PAGEEXEC +extern pgprot_t PAGE_SHARED_NOEXEC; +# define PAGE_COPY_NOEXEC __pgprot(BTFIXUP_INT(page_copy_noexec)) +# define PAGE_READONLY_NOEXEC __pgprot(BTFIXUP_INT(page_readonly_noexec)) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + extern unsigned long page_kernel; #ifdef MODULE diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h index 1407c07..7e10231 100644 --- a/arch/sparc/include/asm/pgtsrmmu.h +++ b/arch/sparc/include/asm/pgtsrmmu.h @@ -115,6 +115,13 @@ SRMMU_EXEC | SRMMU_REF) #define SRMMU_PAGE_RDONLY __pgprot(SRMMU_VALID | SRMMU_CACHE | \ SRMMU_EXEC | SRMMU_REF) + +#ifdef CONFIG_PAX_PAGEEXEC +#define SRMMU_PAGE_SHARED_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_WRITE | SRMMU_REF) +#define SRMMU_PAGE_COPY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) +#define SRMMU_PAGE_RDONLY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) +#endif + #define SRMMU_PAGE_KERNEL __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \ SRMMU_DIRTY | SRMMU_REF) diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 43e5147..47622a1 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -92,14 +92,19 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline arch_read_lock(raw_rwlock_t *lock) +static inline void arch_read_lock(raw_rwlock_t *lock) { unsigned long tmp1, tmp2; __asm__ __volatile__ ( "1: ldsw [%2], %0\n" " brlz,pn %0, 2f\n" -"4: add %0, 1, %1\n" +"4: addcc %0, 1, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT +" tvs %%icc, 6\n" +#endif + " cas [%2], %0, %1\n" " cmp %0, %1\n" " bne,pn %%icc, 1b\n" @@ -112,10 +117,10 @@ static void inline arch_read_lock(raw_rwlock_t *lock) " .previous" : "=&r" (tmp1), "=&r" (tmp2) : "r" (lock) - : "memory"); + : "memory", "cc"); } -static int inline arch_read_trylock(raw_rwlock_t *lock) +static inline int arch_read_trylock(raw_rwlock_t *lock) { int tmp1, tmp2; @@ -123,7 +128,12 @@ static int inline arch_read_trylock(raw_rwlock_t *lock) "1: ldsw [%2], %0\n" " brlz,a,pn %0, 2f\n" " mov 0, %0\n" -" add %0, 1, %1\n" +" addcc %0, 1, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT +" tvs %%icc, 6\n" +#endif + " cas [%2], %0, %1\n" " cmp %0, %1\n" " bne,pn %%icc, 1b\n" @@ -136,13 +146,18 @@ static int inline arch_read_trylock(raw_rwlock_t *lock) return tmp1; } -static void inline arch_read_unlock(raw_rwlock_t *lock) +static inline void arch_read_unlock(raw_rwlock_t *lock) { unsigned long tmp1, tmp2; __asm__ __volatile__( "1: lduw [%2], %0\n" -" sub %0, 1, %1\n" +" subcc %0, 1, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT +" tvs %%icc, 6\n" +#endif + " cas [%2], %0, %1\n" " cmp %0, %1\n" " bne,pn %%xcc, 1b\n" @@ -152,7 +167,7 @@ static void inline arch_read_unlock(raw_rwlock_t *lock) : "memory"); } -static void inline arch_write_lock(raw_rwlock_t *lock) +static inline void arch_write_lock(raw_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -177,7 +192,7 @@ static void inline arch_write_lock(raw_rwlock_t *lock) : "memory"); } -static void inline arch_write_unlock(raw_rwlock_t *lock) +static inline void arch_write_unlock(raw_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -186,7 +201,7 @@ static void inline arch_write_unlock(raw_rwlock_t *lock) : "memory"); } -static int inline arch_write_trylock(raw_rwlock_t *lock) +static inline int arch_write_trylock(raw_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 844d73a..f787fb9 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -50,6 +50,8 @@ struct thread_info { unsigned long w_saved; struct restart_block restart_block; + + unsigned long lowest_stack; }; /* diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index f78ad9a..9f55fc7 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -68,6 +68,8 @@ struct thread_info { struct pt_regs *kern_una_regs; unsigned int kern_una_insn; + unsigned long lowest_stack; + unsigned long fpregs[0] __attribute__ ((aligned(64))); }; diff --git a/arch/sparc/include/asm/uaccess.h b/arch/sparc/include/asm/uaccess.h index e88fbe5..96b0ce5 100644 --- a/arch/sparc/include/asm/uaccess.h +++ b/arch/sparc/include/asm/uaccess.h @@ -1,5 +1,13 @@ #ifndef ___ASM_SPARC_UACCESS_H #define ___ASM_SPARC_UACCESS_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#include +extern void check_object_size(const void *ptr, unsigned long n, bool to); +#endif +#endif + #if defined(__sparc__) && defined(__arch64__) #include #else diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 8303ac4..07f333d 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -249,27 +249,46 @@ extern unsigned long __copy_user(void __user *to, const void __user *from, unsig static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - if (n && __access_ok((unsigned long) to, n)) + if ((long)n < 0) + return n; + + if (n && __access_ok((unsigned long) to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); - else + } else return n; } static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return __copy_user(to, (__force void __user *) from, n); } static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (n && __access_ok((unsigned long) from, n)) + if ((long)n < 0) + return n; + + if (n && __access_ok((unsigned long) from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - else + } else return n; } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + return __copy_user((__force void __user *) to, from, n); } diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 9ea271e..7b8a271 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -212,8 +213,15 @@ extern unsigned long copy_from_user_fixup(void *to, const void __user *from, static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret = ___copy_from_user(to, from, size); + unsigned long ret; + if ((long)size < 0 || size > INT_MAX) + return size; + + if (!__builtin_constant_p(size)) + check_object_size(to, size, false); + + ret = ___copy_from_user(to, from, size); if (unlikely(ret)) ret = copy_from_user_fixup(to, from, size); return ret; @@ -228,8 +236,15 @@ extern unsigned long copy_to_user_fixup(void __user *to, const void *from, static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret = ___copy_to_user(to, from, size); + unsigned long ret; + if ((long)size < 0 || size > INT_MAX) + return size; + + if (!__builtin_constant_p(size)) + check_object_size(from, size, true); + + ret = ___copy_to_user(to, from, size); if (unlikely(ret)) ret = copy_to_user_fixup(to, from, size); return ret; diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 2782681..77ded84 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -3,7 +3,7 @@ # asflags-y := -ansi -ccflags-y := -Werror +#ccflags-y := -Werror extra-y := head_$(BITS).o extra-y += init_task.o diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 7690cc2..ece64c9 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -826,7 +826,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, spin_unlock_irqrestore(&iommu->lock, flags); } -static struct dma_map_ops sun4u_dma_ops = { +static const struct dma_map_ops sun4u_dma_ops = { .alloc_coherent = dma_4u_alloc_coherent, .free_coherent = dma_4u_free_coherent, .map_page = dma_4u_map_page, @@ -837,7 +837,7 @@ static struct dma_map_ops sun4u_dma_ops = { .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu, }; -struct dma_map_ops *dma_ops = &sun4u_dma_ops; +const struct dma_map_ops *dma_ops = &sun4u_dma_ops; EXPORT_SYMBOL(dma_ops); extern int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask); diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 9f61fd8..bd048db 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -392,7 +392,7 @@ static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG(); } -struct dma_map_ops sbus_dma_ops = { +const struct dma_map_ops sbus_dma_ops = { .alloc_coherent = sbus_alloc_coherent, .free_coherent = sbus_free_coherent, .map_page = sbus_map_page, @@ -403,7 +403,7 @@ struct dma_map_ops sbus_dma_ops = { .sync_sg_for_device = sbus_sync_sg_for_device, }; -struct dma_map_ops *dma_ops = &sbus_dma_ops; +const struct dma_map_ops *dma_ops = &sbus_dma_ops; EXPORT_SYMBOL(dma_ops); static int __init sparc_register_ioport(void) @@ -640,7 +640,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist * } } -struct dma_map_ops pci32_dma_ops = { +const struct dma_map_ops pci32_dma_ops = { .alloc_coherent = pci32_alloc_coherent, .free_coherent = pci32_free_coherent, .map_page = pci32_map_page, diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 04df4ed..55c4b6e 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -158,7 +158,7 @@ void kgdb_arch_exit(void) { } -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: ta 0x7d */ .gdb_bpt_instr = { 0x91, 0xd0, 0x20, 0x7d }, }; diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index f5a0fd4..d886f71 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -180,7 +180,7 @@ void kgdb_arch_exit(void) { } -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: ta 0x72 */ .gdb_bpt_instr = { 0x91, 0xd0, 0x20, 0x72 }, }; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 23c33ff..d137fbd 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -525,7 +525,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, spin_unlock_irqrestore(&iommu->lock, flags); } -static struct dma_map_ops sun4v_dma_ops = { +static const struct dma_map_ops sun4v_dma_ops = { .alloc_coherent = dma_4v_alloc_coherent, .free_coherent = dma_4v_free_coherent, .map_page = dma_4v_map_page, diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index c49865b..b41a81b 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -196,7 +196,7 @@ void __show_backtrace(unsigned long fp) rw->ins[4], rw->ins[5], rw->ins[6], rw->ins[7]); - printk("%pS\n", (void *) rw->ins[7]); + printk("%pA\n", (void *) rw->ins[7]); rw = (struct reg_window32 *) rw->ins[6]; } spin_unlock_irqrestore(&sparc_backtrace_lock, flags); @@ -263,14 +263,14 @@ void show_regs(struct pt_regs *r) printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); - printk("PC: <%pS>\n", (void *) r->pc); + printk("PC: <%pA>\n", (void *) r->pc); printk("%%G: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", r->u_regs[0], r->u_regs[1], r->u_regs[2], r->u_regs[3], r->u_regs[4], r->u_regs[5], r->u_regs[6], r->u_regs[7]); printk("%%O: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", r->u_regs[8], r->u_regs[9], r->u_regs[10], r->u_regs[11], r->u_regs[12], r->u_regs[13], r->u_regs[14], r->u_regs[15]); - printk("RPC: <%pS>\n", (void *) r->u_regs[15]); + printk("RPC: <%pA>\n", (void *) r->u_regs[15]); printk("%%L: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", rw->locals[0], rw->locals[1], rw->locals[2], rw->locals[3], @@ -305,7 +305,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) rw = (struct reg_window32 *) fp; pc = rw->ins[7]; printk("[%08lx : ", pc); - printk("%pS ] ", (void *) pc); + printk("%pA ] ", (void *) pc); fp = rw->ins[6]; } while (++count < 16); printk("\n"); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index cb70476..3d0c191 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -180,14 +180,14 @@ static void show_regwindow(struct pt_regs *regs) printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n", rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]); if (regs->tstate & TSTATE_PRIV) - printk("I7: <%pS>\n", (void *) rwk->ins[7]); + printk("I7: <%pA>\n", (void *) rwk->ins[7]); } void show_regs(struct pt_regs *regs) { printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); - printk("TPC: <%pS>\n", (void *) regs->tpc); + printk("TPC: <%pA>\n", (void *) regs->tpc); printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n", regs->u_regs[0], regs->u_regs[1], regs->u_regs[2], regs->u_regs[3]); @@ -200,7 +200,7 @@ void show_regs(struct pt_regs *regs) printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n", regs->u_regs[12], regs->u_regs[13], regs->u_regs[14], regs->u_regs[15]); - printk("RPC: <%pS>\n", (void *) regs->u_regs[15]); + printk("RPC: <%pA>\n", (void *) regs->u_regs[15]); show_regwindow(regs); } @@ -284,7 +284,7 @@ void arch_trigger_all_cpu_backtrace(void) ((tp && tp->task) ? tp->task->pid : -1)); if (gp->tstate & TSTATE_PRIV) { - printk(" TPC[%pS] O7[%pS] I7[%pS] RPC[%pS]\n", + printk(" TPC[%pA] O7[%pA] I7[%pA] RPC[%pA]\n", (void *) gp->tpc, (void *) gp->o7, (void *) gp->i7, diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c index 6edc4e5..06a69b4 100644 --- a/arch/sparc/kernel/sigutil_64.c +++ b/arch/sparc/kernel/sigutil_64.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 3a82e65..ce0a53a 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -57,7 +57,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi if (ARCH_SUN4C && len > 0x20000000) return -ENOMEM; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (flags & MAP_SHARED) addr = COLOUR_ALIGN(addr); @@ -72,7 +72,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi } if (TASK_SIZE - PAGE_SIZE - len < addr) return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) + if (check_heap_stack_gap(vmm, addr, len)) return addr; addr = vmm->vm_end; if (flags & MAP_SHARED) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index cfa0e19..98972ac 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -125,7 +125,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi /* We do not accept a shared mapping if it would violate * cache aliasing constraints. */ - if ((flags & MAP_SHARED) && + if ((filp || (flags & MAP_SHARED)) && ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) return -EINVAL; return addr; @@ -140,6 +140,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi if (filp || (flags & MAP_SHARED)) do_color_align = 1; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -147,15 +151,14 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -175,14 +178,14 @@ full_search: vma = find_vma(mm, VA_EXCLUDE_END); } if (unlikely(task_size < addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (likely(!vma || addr + len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, addr, len))) { /* * Remember the place where we stopped the search: */ @@ -216,7 +219,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* We do not accept a shared mapping if it would violate * cache aliasing constraints. */ - if ((flags & MAP_SHARED) && + if ((filp || (flags & MAP_SHARED)) && ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) return -EINVAL; return addr; @@ -237,8 +240,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } @@ -259,7 +261,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr - len, len)) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr-len); } @@ -268,18 +270,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = mm->mmap_base-len; - if (do_color_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); + addr = mm->mmap_base - len; do { + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, addr, len))) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); } @@ -289,10 +291,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - if (do_color_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -384,6 +384,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm) current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY || sysctl_legacy_va_layout) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { @@ -398,6 +404,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm) gap = (task_size / 6 * 5); mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index c0490c7..84959d1 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -44,6 +44,8 @@ static void instruction_dump(unsigned long *pc) #define __SAVE __asm__ __volatile__("save %sp, -0x40, %sp\n\t") #define __RESTORE __asm__ __volatile__("restore %g0, %g0, %g0\n\t") +extern void gr_handle_kernel_exploit(void); + void die_if_kernel(char *str, struct pt_regs *regs) { static int die_counter; @@ -76,15 +78,17 @@ void die_if_kernel(char *str, struct pt_regs *regs) count++ < 30 && (((unsigned long) rw) >= PAGE_OFFSET) && !(((unsigned long) rw) & 0x7)) { - printk("Caller[%08lx]: %pS\n", rw->ins[7], + printk("Caller[%08lx]: %pA\n", rw->ins[7], (void *) rw->ins[7]); rw = (struct reg_window32 *)rw->ins[6]; } } printk("Instruction DUMP:"); instruction_dump ((unsigned long *) regs->pc); - if(regs->psr & PSR_PS) + if(regs->psr & PSR_PS) { + gr_handle_kernel_exploit(); do_exit(SIGKILL); + } do_exit(SIGSEGV); } diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 10f7bb9..cdb6793 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -73,7 +73,7 @@ static void dump_tl1_traplog(struct tl1_traplog *p) i + 1, p->trapstack[i].tstate, p->trapstack[i].tpc, p->trapstack[i].tnpc, p->trapstack[i].tt); - printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc); + printk("TRAPLOG: TPC<%pA>\n", (void *) p->trapstack[i].tpc); } } @@ -93,6 +93,12 @@ void bad_trap(struct pt_regs *regs, long lvl) lvl -= 0x100; if (regs->tstate & TSTATE_PRIV) { + +#ifdef CONFIG_PAX_REFCOUNT + if (lvl == 6) + pax_report_refcount_overflow(regs); +#endif + sprintf(buffer, "Kernel bad sw trap %lx", lvl); die_if_kernel(buffer, regs); } @@ -111,11 +117,16 @@ void bad_trap(struct pt_regs *regs, long lvl) void bad_trap_tl1(struct pt_regs *regs, long lvl) { char buffer[32]; - + if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs, 0, lvl, SIGTRAP) == NOTIFY_STOP) return; +#ifdef CONFIG_PAX_REFCOUNT + if (lvl == 6) + pax_report_refcount_overflow(regs); +#endif + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); sprintf (buffer, "Bad trap %lx at tl>0", lvl); @@ -1139,7 +1150,7 @@ static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *in regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate); printk("%s" "ERROR(%d): ", (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id()); - printk("TPC<%pS>\n", (void *) regs->tpc); + printk("TPC<%pA>\n", (void *) regs->tpc); printk("%s" "ERROR(%d): M_SYND(%lx), E_SYND(%lx)%s%s\n", (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT, @@ -1746,7 +1757,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) smp_processor_id(), (type & 0x1) ? 'I' : 'D', regs->tpc); - printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "TPC<%pA>\n", (void *) regs->tpc); panic("Irrecoverable Cheetah+ parity error."); } @@ -1754,7 +1765,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) smp_processor_id(), (type & 0x1) ? 'I' : 'D', regs->tpc); - printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_WARNING "TPC<%pA>\n", (void *) regs->tpc); } struct sun4v_error_entry { @@ -1961,9 +1972,9 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); - printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "SUN4V-ITLB: TPC<%pA>\n", (void *) regs->tpc); printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]); - printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n", + printk(KERN_EMERG "SUN4V-ITLB: O7<%pA>\n", (void *) regs->u_regs[UREG_I7]); printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] " "pte[%lx] error[%lx]\n", @@ -1985,9 +1996,9 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); - printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "SUN4V-DTLB: TPC<%pA>\n", (void *) regs->tpc); printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]); - printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n", + printk(KERN_EMERG "SUN4V-DTLB: O7<%pA>\n", (void *) regs->u_regs[UREG_I7]); printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] " "pte[%lx] error[%lx]\n", @@ -2191,7 +2202,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) fp = (unsigned long)sf->fp + STACK_BIAS; } - printk(" [%016lx] %pS\n", pc, (void *) pc); + printk(" [%016lx] %pA\n", pc, (void *) pc); } while (++count < 16); } @@ -2233,6 +2244,8 @@ static inline struct reg_window *kernel_stack_up(struct reg_window *rw) return (struct reg_window *) (fp + STACK_BIAS); } +extern void gr_handle_kernel_exploit(void); + void die_if_kernel(char *str, struct pt_regs *regs) { static int die_counter; @@ -2260,7 +2273,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) while (rw && count++ < 30&& is_kernel_stack(current, rw)) { - printk("Caller[%016lx]: %pS\n", rw->ins[7], + printk("Caller[%016lx]: %pA\n", rw->ins[7], (void *) rw->ins[7]); rw = kernel_stack_up(rw); @@ -2273,8 +2286,11 @@ void die_if_kernel(char *str, struct pt_regs *regs) } user_instruction_dump ((unsigned int __user *) regs->tpc); } - if (regs->tstate & TSTATE_PRIV) + if (regs->tstate & TSTATE_PRIV) { + gr_handle_kernel_exploit(); do_exit(SIGKILL); + } + do_exit(SIGSEGV); } EXPORT_SYMBOL(die_if_kernel); diff --git a/arch/sparc/kernel/una_asm_64.S b/arch/sparc/kernel/una_asm_64.S index be183fe..1c8d332 100644 --- a/arch/sparc/kernel/una_asm_64.S +++ b/arch/sparc/kernel/una_asm_64.S @@ -127,7 +127,7 @@ do_int_load: wr %o5, 0x0, %asi retl mov 0, %o0 - .size __do_int_load, .-__do_int_load + .size do_int_load, .-do_int_load .section __ex_table,"a" .word 4b, __retl_efault diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 3792099..2af17d8 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -288,7 +288,7 @@ static void log_unaligned(struct pt_regs *regs) if (count < 5) { last_time = jiffies; count++; - printk("Kernel unaligned access at TPC[%lx] %pS\n", + printk("Kernel unaligned access at TPC[%lx] %pA\n", regs->tpc, (void *) regs->tpc); } } diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index e75faf0..24f12f9 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -2,7 +2,7 @@ # asflags-y := -ansi -DST_DIV0=0x02 -ccflags-y := -Werror +#ccflags-y := -Werror lib-$(CONFIG_SPARC32) += mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 0268210..f0291ca 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -18,7 +18,12 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, 2f @@ -28,12 +33,32 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic_add, .-atomic_add + .globl atomic_add_unchecked + .type atomic_add_unchecked,#function +atomic_add_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: lduw [%o1], %g1 + add %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic_add_unchecked, .-atomic_add_unchecked + .globl atomic_sub .type atomic_sub,#function atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, 2f @@ -43,12 +68,32 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic_sub, .-atomic_sub + .globl atomic_sub_unchecked + .type atomic_sub_unchecked,#function +atomic_sub_unchecked: /* %o0 = decrement, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: lduw [%o1], %g1 + sub %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic_sub_unchecked, .-atomic_sub_unchecked + .globl atomic_add_ret .type atomic_add_ret,#function atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, 2f @@ -59,12 +104,33 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic_add_ret, .-atomic_add_ret + .globl atomic_add_ret_unchecked + .type atomic_add_ret_unchecked,#function +atomic_add_ret_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: lduw [%o1], %g1 + addcc %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 2f + add %g7, %o0, %g7 + sra %g7, 0, %o0 + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic_add_ret_unchecked, .-atomic_add_ret_unchecked + .globl atomic_sub_ret .type atomic_sub_ret,#function atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, 2f @@ -80,7 +146,12 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, 2f @@ -90,12 +161,32 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic64_add, .-atomic64_add + .globl atomic64_add_unchecked + .type atomic64_add_unchecked,#function +atomic64_add_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o1], %g1 + addcc %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic64_add_unchecked, .-atomic64_add_unchecked + .globl atomic64_sub .type atomic64_sub,#function atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, 2f @@ -105,12 +196,32 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic64_sub, .-atomic64_sub + .globl atomic64_sub_unchecked + .type atomic64_sub_unchecked,#function +atomic64_sub_unchecked: /* %o0 = decrement, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o1], %g1 + subcc %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic64_sub_unchecked, .-atomic64_sub_unchecked + .globl atomic64_add_ret .type atomic64_add_ret,#function atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, 2f @@ -121,12 +232,33 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic64_add_ret, .-atomic64_add_ret + .globl atomic64_add_ret_unchecked + .type atomic64_add_ret_unchecked,#function +atomic64_add_ret_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o1], %g1 + addcc %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, 2f + add %g7, %o0, %g7 + mov %g7, %o0 + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic64_add_ret_unchecked, .-atomic64_add_ret_unchecked + .globl atomic64_sub_ret .type atomic64_sub_ret,#function atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, 2f diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 704b126..2e79d76 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -144,12 +144,18 @@ EXPORT_SYMBOL(__downgrade_write); /* Atomic counter implementation. */ EXPORT_SYMBOL(atomic_add); +EXPORT_SYMBOL(atomic_add_unchecked); EXPORT_SYMBOL(atomic_add_ret); +EXPORT_SYMBOL(atomic_add_ret_unchecked); EXPORT_SYMBOL(atomic_sub); +EXPORT_SYMBOL(atomic_sub_unchecked); EXPORT_SYMBOL(atomic_sub_ret); EXPORT_SYMBOL(atomic64_add); +EXPORT_SYMBOL(atomic64_add_unchecked); EXPORT_SYMBOL(atomic64_add_ret); +EXPORT_SYMBOL(atomic64_add_ret_unchecked); EXPORT_SYMBOL(atomic64_sub); +EXPORT_SYMBOL(atomic64_sub_unchecked); EXPORT_SYMBOL(atomic64_sub_ret); /* Atomic bit operations. */ diff --git a/arch/sparc/lib/rwsem_64.S b/arch/sparc/lib/rwsem_64.S index 91a7d29..ce75c29 100644 --- a/arch/sparc/lib/rwsem_64.S +++ b/arch/sparc/lib/rwsem_64.S @@ -11,7 +11,12 @@ .globl __down_read __down_read: 1: lduw [%o0], %g1 - add %g1, 1, %g7 + addcc %g1, 1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o0], %g1, %g7 cmp %g1, %g7 bne,pn %icc, 1b @@ -33,7 +38,12 @@ __down_read: .globl __down_read_trylock __down_read_trylock: 1: lduw [%o0], %g1 - add %g1, 1, %g7 + addcc %g1, 1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cmp %g7, 0 bl,pn %icc, 2f mov 0, %o1 @@ -51,7 +61,12 @@ __down_write: or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 1: lduw [%o0], %g3 - add %g3, %g1, %g7 + addcc %g3, %g1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o0], %g3, %g7 cmp %g3, %g7 bne,pn %icc, 1b @@ -77,7 +92,12 @@ __down_write_trylock: cmp %g3, 0 bne,pn %icc, 2f mov 0, %o1 - add %g3, %g1, %g7 + addcc %g3, %g1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o0], %g3, %g7 cmp %g3, %g7 bne,pn %icc, 1b @@ -90,7 +110,12 @@ __down_write_trylock: __up_read: 1: lduw [%o0], %g1 - sub %g1, 1, %g7 + subcc %g1, 1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o0], %g1, %g7 cmp %g1, %g7 bne,pn %icc, 1b @@ -118,7 +143,12 @@ __up_write: or %g1, %lo(RWSEM_ACTIVE_WRITE_BIAS), %g1 1: lduw [%o0], %g3 - sub %g3, %g1, %g7 + subcc %g3, %g1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o0], %g3, %g7 cmp %g3, %g7 bne,pn %icc, 1b @@ -143,7 +173,12 @@ __downgrade_write: or %g1, %lo(RWSEM_WAITING_BIAS), %g1 1: lduw [%o0], %g3 - sub %g3, %g1, %g7 + subcc %g3, %g1, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o0], %g3, %g7 cmp %g3, %g7 bne,pn %icc, 1b diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 79836a7..62f47a2 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -2,7 +2,7 @@ # asflags-y := -ansi -ccflags-y := -Werror +#ccflags-y := -Werror obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index b99f81c..3453e93 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include #include @@ -167,6 +170,267 @@ static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) return safe_compute_effective_address(regs, insn); } +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_DLRESOLVE +static void pax_emuplt_close(struct vm_area_struct *vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + unsigned int *kaddr; + + vmf->page = alloc_page(GFP_HIGHUSER); + if (!vmf->page) + return VM_FAULT_OOM; + + kaddr = kmap(vmf->page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(vmf->page); + kunmap(vmf->page); + return VM_FAULT_MAJOR; +} + +static const struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .fault = pax_emuplt_fault +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int *)regs->pc); + err |= get_user(sethi2, (unsigned int *)(regs->pc+4)); + err |= get_user(jmpl, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned int addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int *)regs->pc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned int addr; + + addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->pc); + err |= get_user(jmpl, (unsigned int *)(regs->pc+4)); + err |= get_user(nop, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned int addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int *)regs->pc); + err |= get_user(ba, (unsigned int *)(regs->pc+4)); + err |= get_user(nop, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned int addr, save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + else + addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + + err = get_user(save, (unsigned int *)addr); + err |= get_user(call, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + if (err) + break; + +#ifdef CONFIG_PAX_DLRESOLVE + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->pc = call_dl_resolve; + regs->npc = addr+4; + return 3; + } +#endif + + /* PaX: glibc 2.4+ generates sethi/jmpl instead of save/call */ + if ((save & 0xFFC00000U) == 0x05000000U && + (call & 0xFFFFE000U) == 0x85C0A000U && + nop == 0x01000000U) + { + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G2] = addr + 4; + addr = (save & 0x003FFFFFU) << 10; + addr += (((call | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 3; + } + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int *)(regs->pc-4)); + err |= get_user(call, (unsigned int *)regs->pc); + err |= get_user(nop, (unsigned int *)(regs->pc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2); + + regs->u_regs[UREG_RETPC] = regs->pc; + regs->pc = dl_resolve; + regs->npc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long address) { @@ -231,6 +495,24 @@ good_area: if(!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) { + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void *)regs->pc, (void *)regs->u_regs[UREG_FP]); + do_group_exit(SIGKILL); + } +#endif + /* Allow reads even for write-only mappings */ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 43b0da9..a0b78f9 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #include @@ -78,7 +81,7 @@ static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", regs->tpc); printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]); - printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]); + printk("OOPS: RPC <%pA>\n", (void *) regs->u_regs[15]); printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); dump_stack(); unhandled_fault(regs->tpc, current, regs); @@ -249,6 +252,456 @@ static void noinline bogus_32bit_fault_address(struct pt_regs *regs, show_regs(regs); } +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_DLRESOLVE +static void pax_emuplt_close(struct vm_area_struct *vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + unsigned int *kaddr; + + vmf->page = alloc_page(GFP_HIGHUSER); + if (!vmf->page) + return VM_FAULT_OOM; + + kaddr = kmap(vmf->page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(vmf->page); + kunmap(vmf->page); + return VM_FAULT_MAJOR; +} + +static const struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .fault = pax_emuplt_fault +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +/* + * PaX: decide what to do with offenders (regs->tpc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int *)regs->tpc); + err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int *)regs->tpc); + + if (!err && (ba & 0xFFC00000U) == 0x30800000U) { + unsigned long addr; + + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, jmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+4)); + err |= get_user(nop, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #4 */ + unsigned int sethi, mov1, call, mov2; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(mov1, (unsigned int *)(regs->tpc+4)); + err |= get_user(call, (unsigned int *)(regs->tpc+8)); + err |= get_user(mov2, (unsigned int *)(regs->tpc+12)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + mov1 == 0x8210000FU && + (call & 0xC0000000U) == 0x40000000U && + mov2 == 0x9E100001U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC]; + addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #5 */ + unsigned int sethi, sethi1, sethi2, or1, or2, sllx, jmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(sethi1, (unsigned int *)(regs->tpc+4)); + err |= get_user(sethi2, (unsigned int *)(regs->tpc+8)); + err |= get_user(or1, (unsigned int *)(regs->tpc+12)); + err |= get_user(or2, (unsigned int *)(regs->tpc+16)); + err |= get_user(sllx, (unsigned int *)(regs->tpc+20)); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+24)); + err |= get_user(nop, (unsigned int *)(regs->tpc+28)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + (or1 & 0xFFFFE000U) == 0x82106000U && + (or2 & 0xFFFFE000U) == 0x8A116000U && + sllx == 0x83287020U && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #6 */ + unsigned int sethi, sethi1, sethi2, sllx, or, jmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(sethi1, (unsigned int *)(regs->tpc+4)); + err |= get_user(sethi2, (unsigned int *)(regs->tpc+8)); + err |= get_user(sllx, (unsigned int *)(regs->tpc+12)); + err |= get_user(or, (unsigned int *)(regs->tpc+16)); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+20)); + err |= get_user(nop, (unsigned int *)(regs->tpc+24)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + sllx == 0x83287020U && + (or & 0xFFFFE000U) == 0x8A116000U && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(ba, (unsigned int *)(regs->tpc+4)); + err |= get_user(nop, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned long addr; + unsigned int save, call; + unsigned int sethi1, sethi2, or1, or2, sllx, add, jmpl; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + else + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + err = get_user(save, (unsigned int *)addr); + err |= get_user(call, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + if (err) + break; + +#ifdef CONFIG_PAX_DLRESOLVE + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->tpc = call_dl_resolve; + regs->tnpc = addr+4; + return 3; + } +#endif + + /* PaX: glibc 2.4+ generates sethi/jmpl instead of save/call */ + if ((save & 0xFFC00000U) == 0x05000000U && + (call & 0xFFFFE000U) == 0x85C0A000U && + nop == 0x01000000U) + { + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G2] = addr + 4; + addr = (save & 0x003FFFFFU) << 10; + addr += (((call | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 3; + } + + /* PaX: 64-bit PLT stub */ + err = get_user(sethi1, (unsigned int *)addr); + err |= get_user(sethi2, (unsigned int *)(addr+4)); + err |= get_user(or1, (unsigned int *)(addr+8)); + err |= get_user(or2, (unsigned int *)(addr+12)); + err |= get_user(sllx, (unsigned int *)(addr+16)); + err |= get_user(add, (unsigned int *)(addr+20)); + err |= get_user(jmpl, (unsigned int *)(addr+24)); + err |= get_user(nop, (unsigned int *)(addr+28)); + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x09000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + (or1 & 0xFFFFE000U) == 0x88112000U && + (or2 & 0xFFFFE000U) == 0x8A116000U && + sllx == 0x89293020U && + add == 0x8A010005U && + jmpl == 0x89C14000U && + nop == 0x01000000U) + { + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G4] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); + regs->u_regs[UREG_G4] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); + regs->u_regs[UREG_G5] += regs->u_regs[UREG_G4]; + regs->u_regs[UREG_G4] = addr + 24; + addr = regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 3; + } + } + } while (0); + +#ifdef CONFIG_PAX_DLRESOLVE + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int *)(regs->tpc-4)); + err |= get_user(call, (unsigned int *)regs->tpc); + err |= get_user(nop, (unsigned int *)(regs->tpc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + dl_resolve &= 0xFFFFFFFFUL; + + regs->u_regs[UREG_RETPC] = regs->tpc; + regs->tpc = dl_resolve; + regs->tnpc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + do { /* PaX: patched PLT emulation #7, must be AFTER the unpatched PLT emulation */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(ba, (unsigned int *)(regs->tpc+4)); + err |= get_user(nop, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (ba & 0xFFF00000U) == 0x30600000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -315,6 +768,29 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) if (!vma) goto bad_area; +#ifdef CONFIG_PAX_PAGEEXEC + /* PaX: detect ITLB misses on non-exec pages */ + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address && + !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB)) + { + if (address != regs->tpc) + goto good_area; + + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void *)regs->tpc, (void *)(regs->u_regs[UREG_FP] + STACK_BIAS)); + do_group_exit(SIGKILL); + } +#endif + /* Pure DTLB misses do not tell us whether the fault causing * load/store/atomic was a write or not, it only says that there * was no match. So in such a case we (carefully) read the diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index f27d103..1b06377 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -69,7 +69,7 @@ full_search: } return -ENOMEM; } - if (likely(!vma || addr + len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, addr, len))) { /* * Remember the place where we stopped the search: */ @@ -108,7 +108,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr - len, len)) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr-len); } @@ -117,16 +117,17 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = (mm->mmap_base-len) & HPAGE_MASK; + addr = mm->mmap_base - len; do { + addr &= HPAGE_MASK; /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, addr, len))) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); } @@ -136,8 +137,8 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = (vma->vm_start-len) & HPAGE_MASK; - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -183,8 +184,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); - if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index dc7c3b1..34c0070 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -317,6 +317,9 @@ extern void device_scan(void); pgprot_t PAGE_SHARED __read_mostly; EXPORT_SYMBOL(PAGE_SHARED); +pgprot_t PAGE_SHARED_NOEXEC __read_mostly; +EXPORT_SYMBOL(PAGE_SHARED_NOEXEC); + void __init paging_init(void) { switch(sparc_cpu_model) { @@ -345,17 +348,17 @@ void __init paging_init(void) /* Initialize the protection map with non-constant, MMU dependent values. */ protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; + protection_map[1] = PAGE_READONLY_NOEXEC; + protection_map[2] = PAGE_COPY_NOEXEC; + protection_map[3] = PAGE_COPY_NOEXEC; protection_map[4] = PAGE_READONLY; protection_map[5] = PAGE_READONLY; protection_map[6] = PAGE_COPY; protection_map[7] = PAGE_COPY; protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; + protection_map[9] = PAGE_READONLY_NOEXEC; + protection_map[10] = PAGE_SHARED_NOEXEC; + protection_map[11] = PAGE_SHARED_NOEXEC; protection_map[12] = PAGE_READONLY; protection_map[13] = PAGE_READONLY; protection_map[14] = PAGE_SHARED; diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 509b1ff..bfd7118 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -2200,6 +2200,13 @@ void __init ld_mmu_srmmu(void) PAGE_SHARED = pgprot_val(SRMMU_PAGE_SHARED); BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); + +#ifdef CONFIG_PAX_PAGEEXEC + PAGE_SHARED_NOEXEC = pgprot_val(SRMMU_PAGE_SHARED_NOEXEC); + BTFIXUPSET_INT(page_copy_noexec, pgprot_val(SRMMU_PAGE_COPY_NOEXEC)); + BTFIXUPSET_INT(page_readonly_noexec, pgprot_val(SRMMU_PAGE_RDONLY_NOEXEC)); +#endif + BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); diff --git a/arch/um/Makefile b/arch/um/Makefile index fc633db..5e1a1c2 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -49,6 +49,10 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\ $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \ $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 +ifdef CONSTIFY_PLUGIN +USER_CFLAGS += $(CONSTIFY_PLUGIN) -fplugin-arg-constify_plugin-no-constify +endif + include $(srctree)/$(ARCH_DIR)/Makefile-$(SUBARCH) #This will adjust *FLAGS accordingly to the platform. diff --git a/arch/um/include/asm/kmap_types.h b/arch/um/include/asm/kmap_types.h index 6c03acd..a5e0215 100644 --- a/arch/um/include/asm/kmap_types.h +++ b/arch/um/include/asm/kmap_types.h @@ -23,6 +23,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 4cc9b6c..02e5029 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -14,6 +14,9 @@ #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + #ifndef __ASSEMBLY__ struct page; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 4a28a15..654dc2a 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -393,22 +393,6 @@ int singlestepping(void * t) return 2; } -/* - * Only x86 and x86_64 have an arch_align_stack(). - * All other arches have "#define arch_align_stack(x) (x)" - * in their asm/system.h - * As this is included in UML from asm-um/system-generic.h, - * we can use it to behave as the subarch does. - */ -#ifndef arch_align_stack -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} -#endif - unsigned long get_wchan(struct task_struct *p) { unsigned long stack_page, sp, ip; diff --git a/arch/um/sys-i386/shared/sysdep/system.h b/arch/um/sys-i386/shared/sysdep/system.h index d1b93c4..ae1b7fd 100644 --- a/arch/um/sys-i386/shared/sysdep/system.h +++ b/arch/um/sys-i386/shared/sysdep/system.h @@ -17,7 +17,7 @@ # define AT_VECTOR_SIZE_ARCH 1 #endif -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) void default_idle(void); diff --git a/arch/um/sys-i386/syscalls.c b/arch/um/sys-i386/syscalls.c index 857ca0b..9a2669d 100644 --- a/arch/um/sys-i386/syscalls.c +++ b/arch/um/sys-i386/syscalls.c @@ -11,6 +11,21 @@ #include "asm/uaccess.h" #include "asm/unistd.h" +int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) +{ + unsigned long pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > pax_task_size || addr > pax_task_size - len) + return -EINVAL; + + return 0; +} + /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than diff --git a/arch/um/sys-x86_64/shared/sysdep/system.h b/arch/um/sys-x86_64/shared/sysdep/system.h index d1b93c4..ae1b7fd 100644 --- a/arch/um/sys-x86_64/shared/sysdep/system.h +++ b/arch/um/sys-x86_64/shared/sysdep/system.h @@ -17,7 +17,7 @@ # define AT_VECTOR_SIZE_ARCH 1 #endif -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) void default_idle(void); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 73ae02a..f932de5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -223,7 +223,7 @@ config X86_TRAMPOLINE config X86_32_LAZY_GS def_bool y - depends on X86_32 && !CC_STACKPROTECTOR + depends on X86_32 && !CC_STACKPROTECTOR && !PAX_MEMORY_UDEREF config KTIME_SCALAR def_bool X86_32 @@ -1008,7 +1008,7 @@ choice config NOHIGHMEM bool "off" - depends on !X86_NUMAQ + depends on !X86_NUMAQ && !(PAX_PAGEEXEC && PAX_ENABLE_PAE) ---help--- Linux can use up to 64 Gigabytes of physical memory on x86 systems. However, the address space of 32-bit x86 processors is only 4 @@ -1045,7 +1045,7 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" - depends on !X86_NUMAQ + depends on !X86_NUMAQ && !(PAX_PAGEEXEC && PAX_ENABLE_PAE) ---help--- Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. @@ -1099,7 +1099,7 @@ config PAGE_OFFSET hex default 0xB0000000 if VMSPLIT_3G_OPT default 0x80000000 if VMSPLIT_2G - default 0x78000000 if VMSPLIT_2G_OPT + default 0x70000000 if VMSPLIT_2G_OPT default 0x40000000 if VMSPLIT_1G default 0xC0000000 depends on X86_32 @@ -1460,6 +1460,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + depends on X86_64 || !PAX_MEMORY_UDEREF ---help--- This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on @@ -1517,6 +1518,7 @@ config KEXEC_JUMP config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) default "0x1000000" + range 0x400000 0x40000000 ---help--- This gives the physical address where the kernel is loaded. @@ -1581,6 +1583,7 @@ config PHYSICAL_ALIGN hex prompt "Alignment value to which kernel should be aligned" if X86_32 default "0x1000000" + range 0x400000 0x1000000 if PAX_KERNEXEC range 0x2000 0x1000000 ---help--- This value puts the alignment restrictions on physical address @@ -1612,9 +1615,10 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. config COMPAT_VDSO - def_bool y + def_bool n prompt "Compat VDSO support" depends on X86_32 || IA32_EMULATION + depends on !PAX_NOEXEC && !PAX_MEMORY_UDEREF ---help--- Map the 32-bit VDSO to the predictable old-style address too. ---help--- diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 0e566103..1a6b57e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -340,7 +340,7 @@ config X86_PPRO_FENCE config X86_F00F_BUG def_bool y - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC config X86_WP_WORKS_OK def_bool y @@ -360,7 +360,7 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MCORE2 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 config X86_INTEL_USERCOPY def_bool y @@ -406,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) + depends on (MK8 || MK7 || MCORE2 || MPSC || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..c928727 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -99,7 +99,7 @@ config X86_PTDUMP config DEBUG_RODATA bool "Write protect kernel read-only data structures" default y - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && BROKEN ---help--- Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d2d24c9..0f21f8d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -44,6 +44,7 @@ ifeq ($(CONFIG_X86_32),y) else BITS := 64 UTS_MACHINE := x86_64 + biarch := $(call cc-option,-m64) CHECKFLAGS += -D__x86_64__ -m64 KBUILD_AFLAGS += -m64 @@ -189,3 +190,12 @@ define archhelp echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' endef + +define OLD_LD + +*** ${VERSION}.${PATCHLEVEL} PaX kernels no longer build correctly with old versions of binutils. +*** Please upgrade your binutils to 2.18 or newer +endef + +archprepare: + $(if $(LDFLAGS_BUILD_ID),,$(error $(OLD_LD))) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index ec749c2..bbb5319 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -69,6 +69,9 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) KBUILD_CFLAGS += $(call cc-option, -m32) +ifdef CONSTIFY_PLUGIN +KBUILD_CFLAGS += $(CONSTIFY_PLUGIN) -fplugin-arg-constify_plugin-no-constify +endif KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 878e4b9..20537ab 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -26,7 +26,7 @@ static inline int variable_test_bit(int nr, const void *addr) u8 v; const u32 *p = (const u32 *)addr; - asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); + asm volatile("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); return v; } @@ -37,7 +37,7 @@ static inline int variable_test_bit(int nr, const void *addr) static inline void set_bit(int nr, void *addr) { - asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); + asm volatile("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); } #endif /* BOOT_BITOPS_H */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 98239d2..f40214c 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -82,7 +82,7 @@ static inline void io_delay(void) static inline u16 ds(void) { u16 seg; - asm("movw %%ds,%0" : "=rm" (seg)); + asm volatile("movw %%ds,%0" : "=rm" (seg)); return seg; } @@ -178,7 +178,7 @@ static inline void wrgs32(u32 v, addr_t addr) static inline int memcmp(const void *s1, const void *s2, size_t len) { u8 diff; - asm("repe; cmpsb; setnz %0" + asm volatile("repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f8ed065..5bf5ff3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -13,6 +13,9 @@ cflags-$(CONFIG_X86_64) := -mcmodel=small KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) +ifdef CONSTIFY_PLUGIN +KBUILD_CFLAGS += $(CONSTIFY_PLUGIN) -fplugin-arg-constify_plugin-no-constify +endif KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f543b70..b60fba8 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -76,7 +76,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $____LOAD_PHYSICAL_ADDR, %ebx #endif /* Target address to relocate to for decompression */ @@ -149,7 +149,7 @@ relocated: * and where it was actually loaded. */ movl %ebp, %ebx - subl $LOAD_PHYSICAL_ADDR, %ebx + subl $____LOAD_PHYSICAL_ADDR, %ebx jz 2f /* Nothing to be done if loaded at compiled addr. */ /* * Process relocations. @@ -157,8 +157,7 @@ relocated: 1: subl $4, %edi movl (%edi), %ecx - testl %ecx, %ecx - jz 2f + jecxz 2f addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) jmp 1b 2: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 077e1b6..2c6b13b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -91,7 +91,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $____LOAD_PHYSICAL_ADDR, %ebx #endif /* Target address to relocate to for decompression */ @@ -183,7 +183,7 @@ no_longmode: hlt jmp 1b -#include "../../kernel/verify_cpu_64.S" +#include "../../kernel/verify_cpu.S" /* * Be careful here startup_64 needs to be at a predictable @@ -234,7 +234,7 @@ ENTRY(startup_64) notq %rax andq %rax, %rbp #else - movq $LOAD_PHYSICAL_ADDR, %rbp + movq $____LOAD_PHYSICAL_ADDR, %rbp #endif /* Target address to relocate to for decompression */ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 842b2a3..f00178b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -288,7 +288,7 @@ static void parse_elf(void *output) case PT_LOAD: #ifdef CONFIG_RELOCATABLE dest = output; - dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); + dest += (phdr->p_paddr - ____LOAD_PHYSICAL_ADDR); #else dest = (void *)(phdr->p_paddr); #endif @@ -335,7 +335,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, error("Destination address too large"); #endif #ifndef CONFIG_RELOCATABLE - if ((unsigned long)output != LOAD_PHYSICAL_ADDR) + if ((unsigned long)output != ____LOAD_PHYSICAL_ADDR) error("Wrong destination address"); #endif diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index bcbd36c..b1754af 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) offs = (olen > ilen) ? olen - ilen : 0; offs += olen >> 12; /* Add 8 bytes for each 32K block */ - offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */ + offs += 64*1024; /* Add 64K bytes slack */ offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ printf(".section \".rodata.compressed\",\"a\",@progbits\n"); diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index bbeb0c3..f5167ab 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -10,8 +10,11 @@ #define USE_BSD #include +#include "../../../../include/linux/autoconf.h" + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static Elf32_Ehdr ehdr; +static Elf32_Phdr *phdr; static unsigned long reloc_count, reloc_idx; static unsigned long *relocs; @@ -37,7 +40,7 @@ static const char* safe_abs_relocs[] = { static int is_safe_abs_reloc(const char* sym_name) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { if (!strcmp(sym_name, safe_abs_relocs[i])) @@ -245,9 +248,39 @@ static void read_ehdr(FILE *fp) } } +static void read_phdrs(FILE *fp) +{ + unsigned int i; + + phdr = calloc(ehdr.e_phnum, sizeof(Elf32_Phdr)); + if (!phdr) { + die("Unable to allocate %d program headers\n", + ehdr.e_phnum); + } + if (fseek(fp, ehdr.e_phoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_phoff, strerror(errno)); + } + if (fread(phdr, sizeof(*phdr), ehdr.e_phnum, fp) != ehdr.e_phnum) { + die("Cannot read ELF program headers: %s\n", + strerror(errno)); + } + for(i = 0; i < ehdr.e_phnum; i++) { + phdr[i].p_type = elf32_to_cpu(phdr[i].p_type); + phdr[i].p_offset = elf32_to_cpu(phdr[i].p_offset); + phdr[i].p_vaddr = elf32_to_cpu(phdr[i].p_vaddr); + phdr[i].p_paddr = elf32_to_cpu(phdr[i].p_paddr); + phdr[i].p_filesz = elf32_to_cpu(phdr[i].p_filesz); + phdr[i].p_memsz = elf32_to_cpu(phdr[i].p_memsz); + phdr[i].p_flags = elf32_to_cpu(phdr[i].p_flags); + phdr[i].p_align = elf32_to_cpu(phdr[i].p_align); + } + +} + static void read_shdrs(FILE *fp) { - int i; + unsigned int i; Elf32_Shdr shdr; secs = calloc(ehdr.e_shnum, sizeof(struct section)); @@ -282,7 +315,7 @@ static void read_shdrs(FILE *fp) static void read_strtabs(FILE *fp) { - int i; + unsigned int i; for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_STRTAB) { @@ -307,7 +340,7 @@ static void read_strtabs(FILE *fp) static void read_symtabs(FILE *fp) { - int i,j; + unsigned int i,j; for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_SYMTAB) { @@ -340,7 +373,9 @@ static void read_symtabs(FILE *fp) static void read_relocs(FILE *fp) { - int i,j; + unsigned int i,j; + uint32_t base; + for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_REL) { @@ -360,9 +395,18 @@ static void read_relocs(FILE *fp) die("Cannot read symbol table: %s\n", strerror(errno)); } + base = 0; + for (j = 0; j < ehdr.e_phnum; j++) { + if (phdr[j].p_type != PT_LOAD ) + continue; + if (secs[sec->shdr.sh_info].shdr.sh_offset < phdr[j].p_offset || secs[sec->shdr.sh_info].shdr.sh_offset >= phdr[j].p_offset + phdr[j].p_filesz) + continue; + base = CONFIG_PAGE_OFFSET + phdr[j].p_paddr - phdr[j].p_vaddr; + break; + } for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel = &sec->reltab[j]; - rel->r_offset = elf32_to_cpu(rel->r_offset); + rel->r_offset = elf32_to_cpu(rel->r_offset) + base; rel->r_info = elf32_to_cpu(rel->r_info); } } @@ -371,14 +415,14 @@ static void read_relocs(FILE *fp) static void print_absolute_symbols(void) { - int i; + unsigned int i; printf("Absolute symbols\n"); printf(" Num: Value Size Type Bind Visibility Name\n"); for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; char *sym_strtab; Elf32_Sym *sh_symtab; - int j; + unsigned int j; if (sec->shdr.sh_type != SHT_SYMTAB) { continue; @@ -406,14 +450,14 @@ static void print_absolute_symbols(void) static void print_absolute_relocs(void) { - int i, printed = 0; + unsigned int i, printed = 0; for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; struct section *sec_applies, *sec_symtab; char *sym_strtab; Elf32_Sym *sh_symtab; - int j; + unsigned int j; if (sec->shdr.sh_type != SHT_REL) { continue; } @@ -474,13 +518,13 @@ static void print_absolute_relocs(void) static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) { - int i; + unsigned int i; /* Walk through the relocations */ for (i = 0; i < ehdr.e_shnum; i++) { char *sym_strtab; Elf32_Sym *sh_symtab; struct section *sec_applies, *sec_symtab; - int j; + unsigned int j; struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_REL) { @@ -504,6 +548,21 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) if (sym->st_shndx == SHN_ABS) { continue; } + /* Don't relocate actual per-cpu variables, they are absolute indices, not addresses */ + if (!strcmp(sec_name(sym->st_shndx), ".data.percpu") && strcmp(sym_name(sym_strtab, sym), "__per_cpu_load")) + continue; + +#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_X86_32) + /* Don't relocate actual code, they are relocated implicitly by the base address of KERNEL_CS */ + if (!strcmp(sec_name(sym->st_shndx), ".module.text") && !strcmp(sym_name(sym_strtab, sym), "_etext")) + continue; + if (!strcmp(sec_name(sym->st_shndx), ".init.text")) + continue; + if (!strcmp(sec_name(sym->st_shndx), ".exit.text")) + continue; + if (!strcmp(sec_name(sym->st_shndx), ".text") && strcmp(sym_name(sym_strtab, sym), "__LOAD_PHYSICAL_ADDR")) + continue; +#endif if (r_type == R_386_NONE || r_type == R_386_PC32) { /* * NONE can be ignored and and PC relative @@ -541,7 +600,7 @@ static int cmp_relocs(const void *va, const void *vb) static void emit_relocs(int as_text) { - int i; + unsigned int i; /* Count how many relocations I have and allocate space for them. */ reloc_count = 0; walk_relocs(count_reloc); @@ -634,6 +693,7 @@ int main(int argc, char **argv) fname, strerror(errno)); } read_ehdr(fp); + read_phdrs(fp); read_shdrs(fp); read_strtabs(fp); read_symtabs(fp); diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff03..e4972ff 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -74,7 +74,7 @@ static int has_fpu(void) u16 fcw = -1, fsw = -1; u32 cr0; - asm("movl %%cr0,%0" : "=r" (cr0)); + asm volatile("movl %%cr0,%0" : "=r" (cr0)); if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { cr0 &= ~(X86_CR0_EM|X86_CR0_TS); asm volatile("movl %0,%%cr0" : : "r" (cr0)); @@ -90,7 +90,7 @@ static int has_eflag(u32 mask) { u32 f0, f1; - asm("pushfl ; " + asm volatile("pushfl ; " "pushfl ; " "popl %0 ; " "movl %0,%1 ; " @@ -115,7 +115,7 @@ static void get_flags(void) set_bit(X86_FEATURE_FPU, cpu.flags); if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" + asm volatile("cpuid" : "=a" (max_intel_level), "=b" (cpu_vendor[0]), "=d" (cpu_vendor[1]), @@ -124,7 +124,7 @@ static void get_flags(void) if (max_intel_level >= 0x00000001 && max_intel_level <= 0x0000ffff) { - asm("cpuid" + asm volatile("cpuid" : "=a" (tfms), "=c" (cpu.flags[4]), "=d" (cpu.flags[0]) @@ -136,7 +136,7 @@ static void get_flags(void) cpu.model += ((tfms >> 16) & 0xf) << 4; } - asm("cpuid" + asm volatile("cpuid" : "=a" (max_amd_level) : "a" (0x80000000) : "ebx", "ecx", "edx"); @@ -144,7 +144,7 @@ static void get_flags(void) if (max_amd_level >= 0x80000001 && max_amd_level <= 0x8000ffff) { u32 eax = 0x80000001; - asm("cpuid" + asm volatile("cpuid" : "+a" (eax), "=c" (cpu.flags[6]), "=d" (cpu.flags[1]) @@ -203,9 +203,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) u32 ecx = MSR_K7_HWCR; u32 eax, edx; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); eax &= ~(1 << 15); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); get_flags(); /* Make sure it really did something */ err = check_flags(); @@ -218,9 +218,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) u32 ecx = MSR_VIA_FCR; u32 eax, edx; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); eax |= (1<<1)|(1<<7); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); set_bit(X86_FEATURE_CX8, cpu.flags); err = check_flags(); @@ -231,12 +231,12 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) u32 eax, edx; u32 level = 1; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); - asm("cpuid" + asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm volatile("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); + asm volatile("cpuid" : "+a" (level), "=d" (cpu.flags[0]) : : "ecx", "ebx"); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); err = check_flags(); } diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b31cc54..8d69237 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -224,7 +224,7 @@ setup_data: .quad 0 # 64-bit physical pointer to # single linked list of # struct setup_data -pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr +pref_address: .quad ____LOAD_PHYSICAL_ADDR # preferred load addr #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) #define VO_INIT_SIZE (VO__end - VO__text) diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index cae3feb..ff8ff2a 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -19,7 +19,7 @@ static int detect_memory_e820(void) { - int count = 0; + unsigned int count = 0; struct biosregs ireg, oreg; struct e820entry *desc = boot_params.e820_map; static struct e820entry buf; /* static so it is zeroed */ diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 11e8c6e..fdbb1ed 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -200,6 +200,7 @@ static void vesa_store_pm_info(void) boot_params.screen_info.vesapm_seg = oreg.es; boot_params.screen_info.vesapm_off = oreg.di; + boot_params.screen_info.vesapm_size = oreg.cx; } /* diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index d42da38..787cdf3 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -90,7 +90,7 @@ static void store_mode_params(void) static unsigned int get_entry(void) { char entry_buf[4]; - int i, len = 0; + unsigned int i, len = 0; int key; unsigned int v; diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 5b577d5..3c1fed4 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -8,6 +8,8 @@ * including this sentence is retained in full. */ +#include + .extern crypto_ft_tab .extern crypto_it_tab .extern crypto_fl_tab @@ -71,6 +73,8 @@ FUNC: movq r1,r2; \ je B192; \ leaq 32(r9),r9; +#define ret pax_force_retaddr 0, 1; ret + #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ movq r1,r2; \ movq r3,r4; \ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index eb0566e..e3ebad8 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -16,6 +16,7 @@ */ #include +#include .text @@ -52,6 +53,7 @@ _key_expansion_256a: pxor %xmm1, %xmm0 movaps %xmm0, (%rcx) add $0x10, %rcx + pax_force_retaddr_bts ret _key_expansion_192a: @@ -75,6 +77,7 @@ _key_expansion_192a: shufps $0b01001110, %xmm2, %xmm1 movaps %xmm1, 16(%rcx) add $0x20, %rcx + pax_force_retaddr_bts ret _key_expansion_192b: @@ -93,6 +96,7 @@ _key_expansion_192b: movaps %xmm0, (%rcx) add $0x10, %rcx + pax_force_retaddr_bts ret _key_expansion_256b: @@ -104,6 +108,7 @@ _key_expansion_256b: pxor %xmm1, %xmm2 movaps %xmm2, (%rcx) add $0x10, %rcx + pax_force_retaddr_bts ret /* @@ -239,7 +244,9 @@ ENTRY(aesni_set_key) cmp %rcx, %rdi jb .Ldec_key_loop xor %rax, %rax + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_set_key) /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -249,7 +256,9 @@ ENTRY(aesni_enc) movups (INP), STATE # input call _aesni_enc1 movups STATE, (OUTP) # output + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_enc) /* * _aesni_enc1: internal ABI @@ -319,6 +328,7 @@ _aesni_enc1: movaps 0x70(TKEYP), KEY # aesenclast KEY, STATE # last round .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + pax_force_retaddr_bts ret /* @@ -482,6 +492,7 @@ _aesni_enc4: .byte 0x66, 0x0f, 0x38, 0xdd, 0xea # aesenclast KEY, STATE4 .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 + pax_force_retaddr_bts ret /* @@ -493,7 +504,9 @@ ENTRY(aesni_dec) movups (INP), STATE # input call _aesni_dec1 movups STATE, (OUTP) #output + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_dec) /* * _aesni_dec1: internal ABI @@ -563,6 +576,7 @@ _aesni_dec1: movaps 0x70(TKEYP), KEY # aesdeclast KEY, STATE # last round .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + pax_force_retaddr_bts ret /* @@ -726,6 +740,7 @@ _aesni_dec4: .byte 0x66, 0x0f, 0x38, 0xdf, 0xea # aesdeclast KEY, STATE4 .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 + pax_force_retaddr_bts ret /* @@ -769,7 +784,9 @@ ENTRY(aesni_ecb_enc) cmp $16, LEN jge .Lecb_enc_loop1 .Lecb_enc_ret: + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_ecb_enc) /* * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -813,7 +830,9 @@ ENTRY(aesni_ecb_dec) cmp $16, LEN jge .Lecb_dec_loop1 .Lecb_dec_ret: + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_ecb_dec) /* * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -837,7 +856,9 @@ ENTRY(aesni_cbc_enc) jge .Lcbc_enc_loop movups STATE, (IVP) .Lcbc_enc_ret: + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_cbc_enc) /* * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -894,4 +915,6 @@ ENTRY(aesni_cbc_dec) .Lcbc_dec_ret: movups IV, (IVP) .Lcbc_dec_just_ret: + pax_force_retaddr 0, 1 ret +ENDPROC(aesni_cbc_dec) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 6214a9b..1f4fc9a 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -1,3 +1,5 @@ +#include + # enter ECRYPT_encrypt_bytes .text .p2align 5 @@ -790,6 +792,7 @@ ECRYPT_encrypt_bytes: add %r11,%rsp mov %rdi,%rax mov %rsi,%rdx + pax_force_retaddr 0, 1 ret # bytesatleast65: ._bytesatleast65: @@ -891,6 +894,7 @@ ECRYPT_keysetup: add %r11,%rsp mov %rdi,%rax mov %rsi,%rdx + pax_force_retaddr ret # enter ECRYPT_ivsetup .text @@ -917,4 +921,5 @@ ECRYPT_ivsetup: add %r11,%rsp mov %rdi,%rax mov %rsi,%rdx + pax_force_retaddr ret diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 35974a5..5662ae2 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -21,6 +21,7 @@ .text #include +#include #define a_offset 0 #define b_offset 4 @@ -269,6 +270,7 @@ twofish_enc_blk: popq R1 movq $1,%rax + pax_force_retaddr 0, 1 ret twofish_dec_blk: @@ -321,4 +323,5 @@ twofish_dec_blk: popq R1 movq $1,%rax + pax_force_retaddr 0, 1 ret diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 14531ab..a89a0c0 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -169,6 +169,8 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long dump_start, dump_size; struct user32 dump; + memset(&dump, 0, sizeof(dump)); + fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; @@ -218,12 +220,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, dump_size = dump.u_ssize << PAGE_SHIFT; DUMP_WRITE(dump_start, dump_size); } - /* - * Finally dump the task struct. Not be used by gdb, but - * could be useful - */ - set_fs(KERNEL_DS); - DUMP_WRITE(current, sizeof(*current)); end_coredump: set_fs(fs); return has_dumped; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 588a7aa..a3468b0 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -167,7 +167,7 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack(uss_ptr ? (const stack_t __force_user *)&uss : NULL, (stack_t __force_user *)&uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -374,7 +374,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -395,7 +395,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, if (used_math()) { sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; + *fpstate = (struct _fpstate_ia32 __user *) sp; if (save_i387_xstate_ia32(*fpstate) < 0) return (void __user *) -1L; } @@ -403,7 +403,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - sp = ((sp + 4) & -16ul) - 4; + sp = ((sp - 12) & -16ul) - 4; return (void __user *) sp; } @@ -461,7 +461,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((const u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -503,7 +503,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0xb8, __NR_ia32_rt_sigreturn, 0x80cd, - 0, + 0 }; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -533,16 +533,18 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + else if (current->mm->context.vdso) + /* Return stub is in 32bit vsyscall page */ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); else - restorer = VDSO32_SYMBOL(current->mm->context.vdso, - rt_sigreturn); + restorer = &frame->retcode; put_user_ex(ptr_to_compat(restorer), &frame->pretcode); /* * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((const u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4edd8eb..29124b4 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -93,6 +95,32 @@ ENTRY(native_irq_enable_sysexit) ENDPROC(native_irq_enable_sysexit) #endif + .macro pax_enter_kernel_user + pax_set_fptr_mask +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_enter_kernel_user +#endif + .endm + + .macro pax_exit_kernel_user +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_exit_kernel_user +#endif +#ifdef CONFIG_PAX_RANDKSTACK + pushq %rax + pushq %r11 + call pax_randomize_kstack + popq %r11 + popq %rax +#endif + .endm + +.macro pax_erase_kstack +#ifdef CONFIG_PAX_MEMORY_STACKLEAK + call pax_erase_kstack +#endif +.endm + /* * 32bit SYSENTER instruction entry. * @@ -119,12 +147,6 @@ ENTRY(ia32_sysenter_target) CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK movq PER_CPU_VAR(kernel_stack), %rsp - addq $(KERNEL_STACK_OFFSET),%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs, here we enable it straight after entry: - */ - ENABLE_INTERRUPTS(CLBR_NONE) movl %ebp,%ebp /* zero extension */ pushq $__USER32_DS CFI_ADJUST_CFA_OFFSET 8 @@ -135,28 +157,42 @@ ENTRY(ia32_sysenter_target) pushfq CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET rflags,0*/ - movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d - CFI_REGISTER rip,r10 + orl $X86_EFLAGS_IF,(%rsp) + GET_THREAD_INFO(%r11) + movl TI_sysenter_return(%r11), %r11d + CFI_REGISTER rip,r11 pushq $__USER32_CS CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET cs,0*/ movl %eax, %eax - pushq %r10 + pushq %r11 CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rip,0 pushq %rax CFI_ADJUST_CFA_OFFSET 8 cld SAVE_ARGS 0,0,1 + pax_enter_kernel_user + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs, here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) /* no need to do an access_ok check here because rbp has been 32bit zero extended */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov $PAX_USER_SHADOW_BASE,%r11 + add %r11,%rbp +#endif + 1: movl (%rbp),%ebp .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + GET_THREAD_INFO(%r11) + orl $TS_COMPAT,TI_status(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11) CFI_REMEMBER_STATE jnz sysenter_tracesys cmpq $(IA32_NR_syscalls-1),%rax @@ -166,13 +202,15 @@ sysenter_do_call: sysenter_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) + GET_THREAD_INFO(%r11) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags(%r11) jnz sysexit_audit sysexit_from_sys_call: - andl $~TS_COMPAT,TI_status(%r10) + pax_exit_kernel_user + pax_erase_kstack + andl $~TS_COMPAT,TI_status(%r11) /* clear IF, that popfq doesn't enable interrupts early */ andl $~0x200,EFLAGS-R11(%rsp) movl RIP-R11(%rsp),%edx /* User %eip */ @@ -200,6 +238,9 @@ sysexit_from_sys_call: movl %eax,%esi /* 2nd arg: syscall number */ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ call audit_syscall_entry + + pax_erase_kstack + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys @@ -211,7 +252,7 @@ sysexit_from_sys_call: .endm .macro auditsys_exit exit - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11) jnz ia32_ret_from_sys_call TRACE_IRQS_ON sti @@ -221,12 +262,12 @@ sysexit_from_sys_call: movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ call audit_syscall_exit - GET_THREAD_INFO(%r10) + GET_THREAD_INFO(%r11) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi cli TRACE_IRQS_OFF - testl %edi,TI_flags(%r10) + testl %edi,TI_flags(%r11) jz \exit CLEAR_RREGS -ARGOFFSET jmp int_with_check @@ -244,7 +285,7 @@ sysexit_audit: sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11) jz sysenter_auditsys #endif SAVE_REST @@ -252,6 +293,9 @@ sysenter_tracesys: movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter + + pax_erase_kstack + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST cmpq $(IA32_NR_syscalls-1),%rax @@ -283,19 +327,20 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_DEF_CFA rsp,0 CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 movq PER_CPU_VAR(kernel_stack),%rsp + SAVE_ARGS 8*6,1,1 + pax_enter_kernel_user /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,1,1 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) @@ -311,13 +356,19 @@ ENTRY(ia32_cstar_target) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov $PAX_USER_SHADOW_BASE,%r11 + add %r11,%r8 +#endif + 1: movl (%r8),%r9d .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + GET_THREAD_INFO(%r11) + orl $TS_COMPAT,TI_status(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11) CFI_REMEMBER_STATE jnz cstar_tracesys cmpq $IA32_NR_syscalls-1,%rax @@ -327,13 +378,15 @@ cstar_do_call: cstar_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) + GET_THREAD_INFO(%r11) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags(%r11) jnz sysretl_audit sysretl_from_sys_call: - andl $~TS_COMPAT,TI_status(%r10) + pax_exit_kernel_user + pax_erase_kstack + andl $~TS_COMPAT,TI_status(%r11) RESTORE_ARGS 1,-ARG_SKIP,1,1,1 movl RIP-ARGOFFSET(%rsp),%ecx CFI_REGISTER rip,rcx @@ -361,7 +414,7 @@ sysretl_audit: cstar_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11) jz cstar_auditsys #endif xchgl %r9d,%ebp @@ -370,6 +423,9 @@ cstar_tracesys: movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter + + pax_erase_kstack + LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ RESTORE_REST xchgl %ebp,%r9d @@ -415,11 +471,6 @@ ENTRY(ia32_syscall) CFI_REL_OFFSET rip,RIP-RIP PARAVIRT_ADJUST_EXCEPTION_FRAME SWAPGS - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%eax pushq %rax CFI_ADJUST_CFA_OFFSET 8 @@ -427,9 +478,15 @@ ENTRY(ia32_syscall) /* note the registers are not zero extended to the sf. this could be a problem. */ SAVE_ARGS 0,0,1 - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + pax_enter_kernel_user + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) + GET_THREAD_INFO(%r11) + orl $TS_COMPAT,TI_status(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11) jnz ia32_tracesys cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys @@ -448,6 +505,9 @@ ia32_tracesys: movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter + + pax_erase_kstack + LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST cmpq $(IA32_NR_syscalls-1),%rax @@ -462,6 +522,7 @@ ia32_badsys: quiet_ni_syscall: movq $-ENOSYS,%rax + pax_force_retaddr ret CFI_ENDPROC diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 016218c..47ccbdd 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -69,8 +69,8 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, */ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) { - typeof(ubuf->st_uid) uid = 0; - typeof(ubuf->st_gid) gid = 0; + typeof(((struct stat64 *)0)->st_uid) uid = 0; + typeof(((struct stat64 *)0)->st_gid) gid = 0; SET_UID(uid, stat->uid); SET_GID(gid, stat->gid); if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || @@ -308,8 +308,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, } set_fs(KERNEL_DS); ret = sys_rt_sigprocmask(how, - set ? (sigset_t __user *)&s : NULL, - oset ? (sigset_t __user *)&s : NULL, + set ? (sigset_t __force_user *)&s : NULL, + oset ? (sigset_t __force_user *)&s : NULL, sigsetsize); set_fs(old_fs); if (ret) @@ -371,7 +371,7 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); + ret = sys_sched_rr_get_interval(pid, (struct timespec __force_user *)&t); set_fs(old_fs); if (put_compat_timespec(&t, interval)) return -EFAULT; @@ -387,7 +387,7 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); + ret = sys_rt_sigpending((sigset_t __force_user *)&s, sigsetsize); set_fs(old_fs); if (!ret) { switch (_NSIG_WORDS) { @@ -412,7 +412,7 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; set_fs(KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force_user *)&info); set_fs(old_fs); return ret; } @@ -513,7 +513,7 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return -EFAULT; set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, + ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __force_user *)&of : NULL, count); set_fs(old_fs); diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d3..b7a8919 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -8,10 +8,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX -1: lock +672: lock .section .smp_locks,"a" .align 4 - X86_ALIGN 1b + X86_ALIGN 672b .previous .endm #else @@ -19,4 +19,43 @@ .endm #endif +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN + .macro pax_force_retaddr_bts rip=0 + btsq $63,\rip(%rsp) + .endm +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_BTS + .macro pax_force_retaddr rip=0, reload=0 + btsq $63,\rip(%rsp) + .endm + .macro pax_force_fptr ptr + btsq $63,\ptr + .endm + .macro pax_set_fptr_mask + .endm +#endif +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR + .macro pax_force_retaddr rip=0, reload=0 + .if \reload + pax_set_fptr_mask + .endif + orq %r10,\rip(%rsp) + .endm + .macro pax_force_fptr ptr + orq %r10,\ptr + .endm + .macro pax_set_fptr_mask + movabs $0x8000000000000000,%r10 + .endm +#endif +#else + .macro pax_force_retaddr rip=0, reload=0 + .endm + .macro pax_force_fptr ptr + .endm + .macro pax_force_retaddr_bts rip=0 + .endm + .macro pax_set_fptr_mask + .endm +#endif + #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc..fdfadf3 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -85,7 +85,7 @@ static inline void alternatives_smp_switch(int smp) {} " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".section .altinstr_replacement, \"a\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ ".previous" diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d..1f97d58 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -46,7 +46,7 @@ static inline void generic_apic_probe(void) #ifdef CONFIG_X86_LOCAL_APIC -extern unsigned int apic_verbosity; +extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 20370c6..a2eb9b0 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -34,7 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall *%%cs:apm_bios_entry\n\t" + "lcall *%%ss:apm_bios_entry\n\t" "setc %%al\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" @@ -58,7 +58,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall *%%cs:apm_bios_entry\n\t" + "lcall *%%ss:apm_bios_entry\n\t" "setc %%bl\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index dc5a667..939040c 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -25,6 +25,17 @@ static inline int atomic_read(const atomic_t *v) } /** + * atomic_read_unchecked - read atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read_unchecked(const atomic_unchecked_t *v) +{ + return v->counter; +} + +/** * atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value @@ -37,6 +48,18 @@ static inline void atomic_set(atomic_t *v, int i) } /** + * atomic_set_unchecked - set atomic variable + * @v: pointer of type atomic_unchecked_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i) +{ + v->counter = i; +} + +/** * atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t @@ -45,7 +68,29 @@ static inline void atomic_set(atomic_t *v, int i) */ static inline void atomic_add(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm volatile(LOCK_PREFIX "addl %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_add_unchecked - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_unchecked_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add_unchecked(int i, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0\n" : "+m" (v->counter) : "ir" (i)); } @@ -59,7 +104,29 @@ static inline void atomic_add(int i, atomic_t *v) */ static inline void atomic_sub(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "subl %1,%0" + asm volatile(LOCK_PREFIX "subl %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addl %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub_unchecked - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_unchecked_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub_unchecked(int i, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0\n" : "+m" (v->counter) : "ir" (i)); } @@ -77,7 +144,16 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + asm volatile(LOCK_PREFIX "subl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (v->counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -91,7 +167,27 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) */ static inline void atomic_inc(atomic_t *v) { - asm volatile(LOCK_PREFIX "incl %0" + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter)); +} + +/** + * atomic_inc_unchecked - increment atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc_unchecked(atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0\n" : "+m" (v->counter)); } @@ -103,7 +199,27 @@ static inline void atomic_inc(atomic_t *v) */ static inline void atomic_dec(atomic_t *v) { - asm volatile(LOCK_PREFIX "decl %0" + asm volatile(LOCK_PREFIX "decl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter)); +} + +/** + * atomic_dec_unchecked - decrement atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec_unchecked(atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0\n" : "+m" (v->counter)); } @@ -119,7 +235,16 @@ static inline int atomic_dec_and_test(atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "decl %0; sete %1" + asm volatile(LOCK_PREFIX "decl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (v->counter), "=qm" (c) : : "memory"); return c != 0; @@ -137,7 +262,35 @@ static inline int atomic_inc_and_test(atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "incl %0; sete %1" + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "into\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test_unchecked - increment and test + * @v: pointer of type atomic_unchecked_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0\n" + "sete %1\n" : "+m" (v->counter), "=qm" (c) : : "memory"); return c != 0; @@ -156,7 +309,16 @@ static inline int atomic_add_negative(int i, atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + asm volatile(LOCK_PREFIX "addl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "+m" (v->counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -179,7 +341,15 @@ static inline int atomic_add_return(int i, atomic_t *v) #endif /* Modern 486+ processor */ __i = i; - asm volatile(LOCK_PREFIX "xaddl %0, %1" + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "movl %0, %1\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+r" (i), "+m" (v->counter) : : "memory"); return i + __i; @@ -195,6 +365,38 @@ no_xadd: /* Legacy 386 processor */ } /** + * atomic_add_return_unchecked - add integer and return + * @v: pointer of type atomic_unchecked_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ + int __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + asm volatile(LOCK_PREFIX "xaddl %0, %1" + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = atomic_read_unchecked(v); + atomic_set_unchecked(v, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + +/** * atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract @@ -211,11 +413,21 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) return cmpxchg(&v->counter, old, new); } +static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + static inline int atomic_xchg(atomic_t *v, int new) { return xchg(&v->counter, new); } +static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new) +{ + return xchg(&v->counter, new); +} + /** * atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t @@ -227,22 +439,39 @@ static inline int atomic_xchg(atomic_t *v, int new) */ static inline int atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; + int c, old, new; c = atomic_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic_cmpxchg((v), c, c + (a)); + + asm volatile("addl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "subl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=r" (new) + : "0" (c), "ir" (a)); + + old = atomic_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #define atomic_inc_return(v) (atomic_add_return(1, v)) +static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_return_unchecked(1, v); +} #define atomic_dec_return(v) (atomic_sub_return(1, v)) /* These are x86-specific, used by some header files */ @@ -266,9 +495,18 @@ typedef struct { u64 __aligned(8) counter; } atomic64_t; +#ifdef CONFIG_PAX_REFCOUNT +typedef struct { + u64 __aligned(8) counter; +} atomic64_unchecked_t; +#else +typedef atomic64_t atomic64_unchecked_t; +#endif + #define ATOMIC64_INIT(val) { (val) } extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); +extern u64 atomic64_cmpxchg_unchecked(atomic64_unchecked_t *ptr, u64 old_val, u64 new_val); /** * atomic64_xchg - xchg atomic64 variable @@ -279,6 +517,7 @@ extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); * the old value. */ extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); +extern u64 atomic64_xchg_unchecked(atomic64_unchecked_t *ptr, u64 new_val); /** * atomic64_set - set atomic64 variable @@ -290,6 +529,15 @@ extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); extern void atomic64_set(atomic64_t *ptr, u64 new_val); /** + * atomic64_unchecked_set - set atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +extern void atomic64_set_unchecked(atomic64_unchecked_t *ptr, u64 new_val); + +/** * atomic64_read - read atomic64 variable * @ptr: pointer to type atomic64_t * @@ -317,7 +565,33 @@ static inline u64 atomic64_read(atomic64_t *ptr) return res; } -extern u64 atomic64_read(atomic64_t *ptr); +/** + * atomic64_read_unchecked - read atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically reads the value of @ptr and returns it. + */ +static inline u64 atomic64_read_unchecked(atomic64_unchecked_t *ptr) +{ + u64 res; + + /* + * Note, we inline this atomic64_unchecked_t primitive because + * it only clobbers EAX/EDX and leaves the others + * untouched. We also (somewhat subtly) rely on the + * fact that cmpxchg8b returns the current 64-bit value + * of the memory location we are touching: + */ + asm volatile( + "mov %%ebx, %%eax\n\t" + "mov %%ecx, %%edx\n\t" + LOCK_PREFIX "cmpxchg8b %1\n" + : "=&A" (res) + : "m" (*ptr) + ); + + return res; +} /** * atomic64_add_return - add and return @@ -332,8 +606,11 @@ extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); * Other variants with different arithmetic operators: */ extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); +extern u64 atomic64_sub_return_unchecked(u64 delta, atomic64_unchecked_t *ptr); extern u64 atomic64_inc_return(atomic64_t *ptr); +extern u64 atomic64_inc_return_unchecked(atomic64_unchecked_t *ptr); extern u64 atomic64_dec_return(atomic64_t *ptr); +extern u64 atomic64_dec_return_unchecked(atomic64_unchecked_t *ptr); /** * atomic64_add - add integer to atomic64 variable @@ -345,6 +622,15 @@ extern u64 atomic64_dec_return(atomic64_t *ptr); extern void atomic64_add(u64 delta, atomic64_t *ptr); /** + * atomic64_add_unchecked - add integer to atomic64 variable + * @delta: integer value to add + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically adds @delta to @ptr. + */ +extern void atomic64_add_unchecked(u64 delta, atomic64_unchecked_t *ptr); + +/** * atomic64_sub - subtract the atomic64 variable * @delta: integer value to subtract * @ptr: pointer to type atomic64_t @@ -354,6 +640,15 @@ extern void atomic64_add(u64 delta, atomic64_t *ptr); extern void atomic64_sub(u64 delta, atomic64_t *ptr); /** + * atomic64_sub_unchecked - subtract the atomic64 variable + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically subtracts @delta from @ptr. + */ +extern void atomic64_sub_unchecked(u64 delta, atomic64_unchecked_t *ptr); + +/** * atomic64_sub_and_test - subtract value from variable and test result * @delta: integer value to subtract * @ptr: pointer to type atomic64_t @@ -373,6 +668,14 @@ extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); extern void atomic64_inc(atomic64_t *ptr); /** + * atomic64_inc_unchecked - increment atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically increments @ptr by 1. + */ +extern void atomic64_inc_unchecked(atomic64_unchecked_t *ptr); + +/** * atomic64_dec - decrement atomic64 variable * @ptr: pointer to type atomic64_t * @@ -381,6 +684,14 @@ extern void atomic64_inc(atomic64_t *ptr); extern void atomic64_dec(atomic64_t *ptr); /** + * atomic64_dec_unchecked - decrement atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically decrements @ptr by 1. + */ +extern void atomic64_dec_unchecked(atomic64_unchecked_t *ptr); + +/** * atomic64_dec_and_test - decrement and test * @ptr: pointer to type atomic64_t * diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index d605dc2..fafd7bd 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h @@ -24,6 +24,17 @@ static inline int atomic_read(const atomic_t *v) } /** + * atomic_read_unchecked - read atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read_unchecked(const atomic_unchecked_t *v) +{ + return v->counter; +} + +/** * atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value @@ -36,6 +47,18 @@ static inline void atomic_set(atomic_t *v, int i) } /** + * atomic_set_unchecked - set atomic variable + * @v: pointer of type atomic_unchecked_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i) +{ + v->counter = i; +} + +/** * atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t @@ -44,7 +67,29 @@ static inline void atomic_set(atomic_t *v, int i) */ static inline void atomic_add(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm volatile(LOCK_PREFIX "addl %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); +} + +/** + * atomic_add_unchecked - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_unchecked_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add_unchecked(int i, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0\n" : "=m" (v->counter) : "ir" (i), "m" (v->counter)); } @@ -58,7 +103,29 @@ static inline void atomic_add(int i, atomic_t *v) */ static inline void atomic_sub(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "subl %1,%0" + asm volatile(LOCK_PREFIX "subl %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addl %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "ir" (i), "m" (v->counter)); +} + +/** + * atomic_sub_unchecked - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_unchecked_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub_unchecked(int i, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0\n" : "=m" (v->counter) : "ir" (i), "m" (v->counter)); } @@ -76,7 +143,16 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + asm volatile(LOCK_PREFIX "subl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "ir" (i), "m" (v->counter) : "memory"); return c; @@ -90,7 +166,28 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) */ static inline void atomic_inc(atomic_t *v) { - asm volatile(LOCK_PREFIX "incl %0" + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic_inc_unchecked - increment atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc_unchecked(atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0\n" : "=m" (v->counter) : "m" (v->counter)); } @@ -103,7 +200,28 @@ static inline void atomic_inc(atomic_t *v) */ static inline void atomic_dec(atomic_t *v) { - asm volatile(LOCK_PREFIX "decl %0" + asm volatile(LOCK_PREFIX "decl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic_dec_unchecked - decrement atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec_unchecked(atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0\n" : "=m" (v->counter) : "m" (v->counter)); } @@ -120,7 +238,16 @@ static inline int atomic_dec_and_test(atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "decl %0; sete %1" + asm volatile(LOCK_PREFIX "decl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "m" (v->counter) : "memory"); return c != 0; @@ -138,7 +265,35 @@ static inline int atomic_inc_and_test(atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "incl %0; sete %1" + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" + : "=m" (v->counter), "=qm" (c) + : "m" (v->counter) : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test_unchecked - increment and test + * @v: pointer of type atomic_unchecked_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0\n" + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "m" (v->counter) : "memory"); return c != 0; @@ -157,7 +312,16 @@ static inline int atomic_add_negative(int i, atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + asm volatile(LOCK_PREFIX "addl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "=m" (v->counter), "=qm" (c) : "ir" (i), "m" (v->counter) : "memory"); return c; @@ -173,7 +337,31 @@ static inline int atomic_add_negative(int i, atomic_t *v) static inline int atomic_add_return(int i, atomic_t *v) { int __i = i; - asm volatile(LOCK_PREFIX "xaddl %0, %1" + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "movl %0, %1\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +} + +/** + * atomic_add_return_unchecked - add and return + * @i: integer value to add + * @v: pointer of type atomic_unchecked_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ + int __i = i; + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" : "+r" (i), "+m" (v->counter) : : "memory"); return i + __i; @@ -185,6 +373,10 @@ static inline int atomic_sub_return(int i, atomic_t *v) } #define atomic_inc_return(v) (atomic_add_return(1, v)) +static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_return_unchecked(1, v); +} #define atomic_dec_return(v) (atomic_sub_return(1, v)) /* The 64-bit atomic type */ @@ -204,6 +396,18 @@ static inline long atomic64_read(const atomic64_t *v) } /** + * atomic64_read_unchecked - read atomic64 variable + * @v: pointer of type atomic64_unchecked_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v) +{ + return v->counter; +} + +/** * atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: required value @@ -216,6 +420,18 @@ static inline void atomic64_set(atomic64_t *v, long i) } /** + * atomic64_set_unchecked - set atomic64 variable + * @v: pointer to type atomic64_unchecked_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i) +{ + v->counter = i; +} + +/** * atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t @@ -224,6 +440,28 @@ static inline void atomic64_set(atomic64_t *v, long i) */ static inline void atomic64_add(long i, atomic64_t *v) { + asm volatile(LOCK_PREFIX "addq %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subq %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_add_unchecked - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_unchecked_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add_unchecked(long i, atomic64_unchecked_t *v) +{ asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) : "er" (i), "m" (v->counter)); @@ -238,7 +476,15 @@ static inline void atomic64_add(long i, atomic64_t *v) */ static inline void atomic64_sub(long i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "subq %1,%0" + asm volatile(LOCK_PREFIX "subq %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addq %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "=m" (v->counter) : "er" (i), "m" (v->counter)); } @@ -256,7 +502,16 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" + asm volatile(LOCK_PREFIX "subq %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addq %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "er" (i), "m" (v->counter) : "memory"); return c; @@ -270,6 +525,27 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) */ static inline void atomic64_inc(atomic64_t *v) { + asm volatile(LOCK_PREFIX "incq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_inc_unchecked - increment atomic64 variable + * @v: pointer to type atomic64_unchecked_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v) +{ asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter)); @@ -283,7 +559,28 @@ static inline void atomic64_inc(atomic64_t *v) */ static inline void atomic64_dec(atomic64_t *v) { - asm volatile(LOCK_PREFIX "decq %0" + asm volatile(LOCK_PREFIX "decq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec_unchecked - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0\n" : "=m" (v->counter) : "m" (v->counter)); } @@ -300,7 +597,16 @@ static inline int atomic64_dec_and_test(atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "decq %0; sete %1" + asm volatile(LOCK_PREFIX "decq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "m" (v->counter) : "memory"); return c != 0; @@ -318,7 +624,16 @@ static inline int atomic64_inc_and_test(atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "incq %0; sete %1" + asm volatile(LOCK_PREFIX "incq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "m" (v->counter) : "memory"); return c != 0; @@ -337,7 +652,16 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" + asm volatile(LOCK_PREFIX "addq %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subq %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "=m" (v->counter), "=qm" (c) : "er" (i), "m" (v->counter) : "memory"); return c; @@ -353,7 +677,31 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) static inline long atomic64_add_return(long i, atomic64_t *v) { long __i = i; - asm volatile(LOCK_PREFIX "xaddq %0, %1;" + asm volatile(LOCK_PREFIX "xaddq %0, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "movq %0, %1\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+r" (i), "+m" (v->counter) + : : "memory"); + return i + __i; +} + +/** + * atomic64_add_return_unchecked - add and return + * @i: integer value to add + * @v: pointer to type atomic64_unchecked_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline long atomic64_add_return_unchecked(long i, atomic64_unchecked_t *v) +{ + long __i = i; + asm volatile(LOCK_PREFIX "xaddq %0, %1" : "+r" (i), "+m" (v->counter) : : "memory"); return i + __i; @@ -365,6 +713,10 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) } #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) +static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v) +{ + return atomic64_add_return_unchecked(1, v); +} #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) @@ -372,21 +724,41 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) return cmpxchg(&v->counter, old, new); } +static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old, long new) +{ + return cmpxchg(&v->counter, old, new); +} + static inline long atomic64_xchg(atomic64_t *v, long new) { return xchg(&v->counter, new); } +static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new) +{ + return xchg(&v->counter, new); +} + static inline long atomic_cmpxchg(atomic_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } +static inline long atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + static inline long atomic_xchg(atomic_t *v, int new) { return xchg(&v->counter, new); } +static inline long atomic_xchg_unchecked(atomic_unchecked_t *v, int new) +{ + return xchg(&v->counter, new); +} + /** * atomic_add_unless - add unless the number is a given value * @v: pointer of type atomic_t @@ -398,17 +770,30 @@ static inline long atomic_xchg(atomic_t *v, int new) */ static inline int atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; + int c, old, new; c = atomic_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic_cmpxchg((v), c, c + (a)); + + asm volatile("addl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "subl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=r" (new) + : "0" (c), "ir" (a)); + + old = atomic_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -424,17 +809,30 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) */ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) { - long c, old; + long c, old, new; c = atomic64_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic64_cmpxchg((v), c, c + (a)); + + asm volatile("addq %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "subq %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=r" (new) + : "0" (c), "er" (a)); + + old = atomic64_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } /** diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a6..d5c4b15 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -38,7 +38,7 @@ * a mask operation on a byte. */ #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((volatile void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) /** diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 7a10659..8bbf355 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -11,10 +11,15 @@ #include /* Physical address where kernel should be loaded. */ -#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ +#define ____LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +#ifndef __ASSEMBLY__ +extern unsigned char __LOAD_PHYSICAL_ADDR[]; +#define LOAD_PHYSICAL_ADDR ((unsigned long)__LOAD_PHYSICAL_ADDR) +#endif + /* Minimum kernel alignment, as a power of two */ #ifdef CONFIG_X86_64 #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 549860d..7d45f68 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -5,9 +5,10 @@ /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#define __read_only __attribute__((__section__(".data.read_only"))) #ifdef CONFIG_X86_VSMP /* vSMP Internode cacheline shift */ diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b54f6af..5b376a6 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -60,7 +60,7 @@ PAGEFLAG(WC, WC) static inline unsigned long get_page_memtype(struct page *pg) { if (!PageUncached(pg) && !PageWC(pg)) - return -1; + return ~0UL; else if (!PageUncached(pg) && PageWC(pg)) return _PAGE_CACHE_WC; else if (PageUncached(pg) && !PageWC(pg)) @@ -85,7 +85,7 @@ static inline void set_page_memtype(struct page *pg, unsigned long memtype) SetPageWC(pg); break; default: - case -1: + case ~0UL: ClearPageUncached(pg); ClearPageWC(pg); break; diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 0e63c9a..ab8d972 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -52,32 +52,32 @@ For 32-bit we have the following conventions - kernel is built with * for assembly code: */ -#define R15 0 -#define R14 8 -#define R13 16 -#define R12 24 -#define RBP 32 -#define RBX 40 +#define R15 (0) +#define R14 (8) +#define R13 (16) +#define R12 (24) +#define RBP (32) +#define RBX (40) /* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 48 -#define R10 56 -#define R9 64 -#define R8 72 -#define RAX 80 -#define RCX 88 -#define RDX 96 -#define RSI 104 -#define RDI 112 -#define ORIG_RAX 120 /* + error_code */ +#define R11 (48) +#define R10 (56) +#define R9 (64) +#define R8 (72) +#define RAX (80) +#define RCX (88) +#define RDX (96) +#define RSI (104) +#define RDI (112) +#define ORIG_RAX (120) /* + error_code */ /* end of arguments */ /* cpu exception frame or undefined in case of fast syscall: */ -#define RIP 128 -#define CS 136 -#define EFLAGS 144 -#define RSP 152 -#define SS 160 +#define RIP (128) +#define CS (136) +#define EFLAGS (144) +#define RSP (152) +#define SS (160) #define ARGOFFSET R11 #define SWFRAME ORIG_RAX diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 46fc474..b02b0f9 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -31,6 +31,14 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err_ptr, int *dst_err_ptr); +asmlinkage __wsum csum_partial_copy_generic_to_user(const void *src, void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + +asmlinkage __wsum csum_partial_copy_generic_from_user(const void *src, void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + /* * Note: when you get a NULL pointer exception here this means someone * passed in an incorrect kernel address to one of these functions. @@ -50,7 +58,7 @@ static inline __wsum csum_partial_copy_from_user(const void __user *src, int *err_ptr) { might_sleep(); - return csum_partial_copy_generic((__force void *)src, dst, + return csum_partial_copy_generic_from_user((__force void *)src, dst, len, sum, err_ptr, NULL); } @@ -178,7 +186,7 @@ static inline __wsum csum_and_copy_to_user(const void *src, { might_sleep(); if (access_ok(VERIFY_WRITE, dst, len)) - return csum_partial_copy_generic(src, (__force void *)dst, + return csum_partial_copy_generic_to_user(src, (__force void *)dst, len, sum, NULL, err_ptr); if (len) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 617bd56..7b047a1 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -4,6 +4,7 @@ #include #include #include +#include #include static inline void fill_ldt(struct desc_struct *desc, @@ -15,6 +16,7 @@ static inline void fill_ldt(struct desc_struct *desc, desc->base1 = (info->base_addr & 0x00ff0000) >> 16; desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; + desc->type |= info->seg_not_present ^ 1; desc->s = 1; desc->dpl = 0x3; desc->p = info->seg_not_present ^ 1; @@ -31,16 +33,12 @@ static inline void fill_ldt(struct desc_struct *desc, } extern struct desc_ptr idt_descr; -extern gate_desc idt_table[]; - -struct gdt_page { - struct desc_struct gdt[GDT_ENTRIES]; -} __attribute__((aligned(PAGE_SIZE))); -DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); +extern gate_desc idt_table[256]; +extern struct desc_struct cpu_gdt_table[NR_CPUS][PAGE_SIZE / sizeof(struct desc_struct)]; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return per_cpu(gdt_page, cpu).gdt; + return cpu_gdt_table[cpu]; } #ifdef CONFIG_X86_64 @@ -65,9 +63,14 @@ static inline void pack_gate(gate_desc *gate, unsigned char type, unsigned long base, unsigned dpl, unsigned flags, unsigned short seg) { - gate->a = (seg << 16) | (base & 0xffff); - gate->b = (base & 0xffff0000) | - (((0x80 | type | (dpl << 5)) & 0xff) << 8); + gate->gate.offset_low = base; + gate->gate.seg = seg; + gate->gate.reserved = 0; + gate->gate.type = type; + gate->gate.s = 0; + gate->gate.dpl = dpl; + gate->gate.p = 1; + gate->gate.offset_high = base >> 16; } #endif @@ -115,13 +118,17 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { + pax_open_kernel(); memcpy(&idt[entry], gate, sizeof(*gate)); + pax_close_kernel(); } static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) { + pax_open_kernel(); memcpy(&ldt[entry], desc, 8); + pax_close_kernel(); } static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, @@ -139,7 +146,10 @@ static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, size = sizeof(struct desc_struct); break; } + + pax_open_kernel(); memcpy(&gdt[entry], desc, size); + pax_close_kernel(); } static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, @@ -211,7 +221,9 @@ static inline void native_set_ldt(const void *addr, unsigned int entries) static inline void native_load_tr_desc(void) { + pax_open_kernel(); asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); + pax_close_kernel(); } static inline void native_load_gdt(const struct desc_ptr *dtr) @@ -246,8 +258,10 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) unsigned int i; struct desc_struct *gdt = get_cpu_gdt_table(cpu); + pax_open_kernel(); for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; + pax_close_kernel(); } #define _LDT_empty(info) \ @@ -309,7 +323,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit = (limit >> 16) & 0xf; } -static inline void _set_gate(int gate, unsigned type, void *addr, +static inline void _set_gate(int gate, unsigned type, const void *addr, unsigned dpl, unsigned ist, unsigned seg) { gate_desc s; @@ -327,7 +341,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr, * Pentium F0 0F bugfix can have resulted in the mapped * IDT being write-protected. */ -static inline void set_intr_gate(unsigned int n, void *addr) +static inline void set_intr_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); @@ -356,19 +370,19 @@ static inline void alloc_intr_gate(unsigned int n, void *addr) /* * This routine sets up an interrupt gate at directory privilege level 3. */ -static inline void set_system_intr_gate(unsigned int n, void *addr) +static inline void set_system_intr_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); } -static inline void set_system_trap_gate(unsigned int n, void *addr) +static inline void set_system_trap_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); } -static inline void set_trap_gate(unsigned int n, void *addr) +static inline void set_trap_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); @@ -377,19 +391,31 @@ static inline void set_trap_gate(unsigned int n, void *addr) static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) { BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); + _set_gate(n, GATE_TASK, (const void *)0, 0, 0, (gdt_entry<<3)); } -static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) +static inline void set_intr_gate_ist(int n, const void *addr, unsigned ist) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); } -static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) +static inline void set_system_intr_gate_ist(int n, const void *addr, unsigned ist) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); } +#ifdef CONFIG_X86_32 +static inline void set_user_cs(unsigned long base, unsigned long limit, int cpu) +{ + struct desc_struct d; + + if (likely(limit)) + limit = (limit - 1UL) >> PAGE_SHIFT; + pack_descriptor(&d, base, limit, 0xFB, 0xC); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_CS, &d, DESCTYPE_S); +} +#endif + #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 9d66848..6b4a691 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -31,6 +31,12 @@ struct desc_struct { unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; }; + struct { + u16 offset_low; + u16 seg; + unsigned reserved: 8, type: 4, s: 1, dpl: 2, p: 1; + unsigned offset_high: 16; + } gate; }; } __attribute__((packed)); diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9..a7c3fa2 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,7 +6,7 @@ struct dev_archdata { void *acpi_handle; #endif #ifdef CONFIG_X86_64 -struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; #endif #ifdef CONFIG_DMAR void *iommu; /* hook for IOMMU specific extension */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d..786b202 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -25,9 +25,9 @@ extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { #ifdef CONFIG_X86_32 return dma_ops; @@ -44,7 +44,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); @@ -122,7 +122,7 @@ static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); void *memory; gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -149,7 +149,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, static inline void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); WARN_ON(irqs_disabled()); /* for portability */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 40b4e61..40d8133 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -133,7 +133,7 @@ extern char *default_machine_specific_memory_setup(void); #define ISA_END_ADDRESS 0x100000 #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) -#define BIOS_BEGIN 0x000a0000 +#define BIOS_BEGIN 0x000c0000 #define BIOS_END 0x00100000 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 8ac9d9a..0a6c96e 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -257,7 +257,25 @@ extern int force_personality32; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ +#ifdef CONFIG_PAX_SEGMEXEC +#define ELF_ET_DYN_BASE ((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? SEGMEXEC_TASK_SIZE/3*2 : TASK_SIZE/3*2) +#else #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#endif + +#ifdef CONFIG_PAX_ASLR +#ifdef CONFIG_X86_32 +#define PAX_ELF_ET_DYN_BASE 0x10000000UL + +#define PAX_DELTA_MMAP_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) +#define PAX_DELTA_STACK_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) +#else +#define PAX_ELF_ET_DYN_BASE 0x400000UL + +#define PAX_DELTA_MMAP_LEN ((test_thread_flag(TIF_IA32)) ? 16 : TASK_SIZE_MAX_SHIFT - PAGE_SHIFT - 3) +#define PAX_DELTA_STACK_LEN ((test_thread_flag(TIF_IA32)) ? 16 : TASK_SIZE_MAX_SHIFT - PAGE_SHIFT - 3) +#endif +#endif /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, @@ -310,9 +328,7 @@ do { \ #define ARCH_DLINFO \ do { \ - if (vdso_enabled) \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, \ - (unsigned long)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso); \ } while (0) #define AT_SYSINFO 32 @@ -323,7 +339,7 @@ do { \ #endif /* !CONFIG_X86_32 */ -#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_CURRENT_BASE (current->mm->context.vdso) #define VDSO_ENTRY \ ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) @@ -337,7 +353,4 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c..d96d011 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -15,6 +15,6 @@ enum reboot_type { extern enum reboot_type reboot_type; -extern void machine_emergency_restart(void); +extern void machine_emergency_restart(void) __noreturn; #endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 1f11ce4..7caabd1 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -12,16 +12,18 @@ #include #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + typecheck(u32 __user *, uaddr); \ asm volatile("1:\t" insn "\n" \ "2:\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ _ASM_EXTABLE(1b, 3b) \ - : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ + : "=r" (oldval), "=r" (ret), "+m" (*(u32 __user *)____m(uaddr))\ : "i" (-EFAULT), "0" (oparg), "1" (0)) #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + typecheck(u32 __user *, uaddr); \ asm volatile("1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ @@ -34,10 +36,10 @@ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=&a" (oldval), "=&r" (ret), \ - "+m" (*uaddr), "=&r" (tem) \ + "+m" (*(u32 __user *)____m(uaddr)), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)) -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -61,10 +63,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + __futex_atomic_op1(__copyuser_seg"xchgl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + __futex_atomic_op1(LOCK_PREFIX __copyuser_seg"xaddl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: @@ -109,7 +111,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, +static inline int futex_atomic_cmpxchg_inatomic(u32 __user *uaddr, int oldval, int newval) { @@ -119,16 +121,16 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, return -ENOSYS; #endif - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX __copyuser_seg"cmpxchgl %3, %1\n" "2:\t.section .fixup, \"ax\"\n" "3:\tmov %2, %0\n" "\tjmp 2b\n" "\t.previous\n" _ASM_EXTABLE(1b, 3b) - : "=a" (oldval), "+m" (*uaddr) + : "=a" (oldval), "+m" (*(u32 *)____m(uaddr)) : "i" (-EFAULT), "r" (newval), "0" (oldval) : "memory" ); diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d9..3bad351 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -92,8 +92,8 @@ extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); /* Statistics */ -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; +extern atomic_unchecked_t irq_err_count; +extern atomic_unchecked_t irq_mis_count; /* EISA */ extern void eisa_set_level_irq(unsigned int irq); diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0b20bbb..4cb1396 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -60,6 +60,11 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + if ((unsigned long)fx < PAX_USER_SHADOW_BASE) + fx = (struct i387_fxsave_struct *)((void *)fx + PAX_USER_SHADOW_BASE); +#endif + asm volatile("1: rex64/fxrstor (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -105,6 +110,11 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + if ((unsigned long)fx < PAX_USER_SHADOW_BASE) + fx = (struct i387_fxsave_struct __user *)((void __user *)fx + PAX_USER_SHADOW_BASE); +#endif + asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -195,13 +205,8 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) } /* We need a safe address that is cheap to find and that is already - in L1 during context switch. The best choices are unfortunately - different for UP and SMP */ -#ifdef CONFIG_SMP -#define safe_address (__per_cpu_offset[0]) -#else -#define safe_address (kstat_cpu(0).cpustat.user) -#endif + in L1 during context switch. */ +#define safe_address (init_tss[smp_processor_id()].x86_tss.sp0) /* * These must be called with preempt disabled @@ -291,7 +296,7 @@ static inline void kernel_fpu_begin(void) struct thread_info *me = current_thread_info(); preempt_disable(); if (me->status & TS_USEDFPU) - __save_init_fpu(me->task); + __save_init_fpu(current); else clts(); } diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index a299900..15c5410 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -3,6 +3,7 @@ #include #include +#include /* * This file contains the definitions for the x86 IO instructions @@ -42,6 +43,17 @@ #ifdef __KERNEL__ +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +static inline int valid_phys_addr_range(unsigned long addr, size_t count) +{ + return ((addr + count + PAGE_SIZE - 1) >> PAGE_SHIFT) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; +} + +static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + return (pfn + (count >> PAGE_SHIFT)) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; +} + #include #include diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index 2440678..c158b88 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -140,6 +140,17 @@ __OUTS(l) #include +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +static inline int valid_phys_addr_range(unsigned long addr, size_t count) +{ + return ((addr + count + PAGE_SIZE - 1) >> PAGE_SHIFT) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; +} + +static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + return (pfn + (count >> PAGE_SHIFT)) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; +} + #include void __memcpy_fromio(void *, unsigned long, unsigned); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21b..8b13915 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -3,7 +3,7 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); -extern struct dma_map_ops nommu_dma_ops; +extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 9e2b952..557206e 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -142,6 +142,11 @@ static inline unsigned long __raw_local_irq_save(void) sti; \ sysexit +#define GET_CR0_INTO_RDI mov %cr0, %rdi +#define SET_RDI_INTO_CR0 mov %rdi, %cr0 +#define GET_CR3_INTO_RDI mov %cr3, %rdi +#define SET_RDI_INTO_CR3 mov %rdi, %cr3 + #else #define INTERRUPT_RETURN iret #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4fe681d..bb6d40c 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -34,13 +34,8 @@ typedef u8 kprobe_opcode_t; #define BREAKPOINT_INSTRUCTION 0xcc #define RELATIVEJUMP_INSTRUCTION 0xe9 #define MAX_INSN_SIZE 16 -#define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) \ - (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) +#define MAX_STACK_SIZE 64UL +#define MIN_STACK_SIZE(ADDR) min(MAX_STACK_SIZE, current->thread.sp0 - (unsigned long)(ADDR)) #define flush_insn_slot(p) do { } while (0) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 08bc2ff..2e88d1f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -534,9 +534,9 @@ struct kvm_x86_ops { bool (*gb_page_enable)(void); const struct trace_print_flags *exit_reasons_str; -}; +} __do_const; -extern struct kvm_x86_ops *kvm_x86_ops; +extern const struct kvm_x86_ops *kvm_x86_ops; int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 47b9b6f..815aaa1 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -18,26 +18,58 @@ typedef struct { static inline void local_inc(local_t *l) { - asm volatile(_ASM_INC "%0" + asm volatile(_ASM_INC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_DEC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (l->a.counter)); } static inline void local_dec(local_t *l) { - asm volatile(_ASM_DEC "%0" + asm volatile(_ASM_DEC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_INC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (l->a.counter)); } static inline void local_add(long i, local_t *l) { - asm volatile(_ASM_ADD "%1,%0" + asm volatile(_ASM_ADD "%1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_SUB "%1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (l->a.counter) : "ir" (i)); } static inline void local_sub(long i, local_t *l) { - asm volatile(_ASM_SUB "%1,%0" + asm volatile(_ASM_SUB "%1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_ADD "%1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (l->a.counter) : "ir" (i)); } @@ -55,7 +87,16 @@ static inline int local_sub_and_test(long i, local_t *l) { unsigned char c; - asm volatile(_ASM_SUB "%2,%0; sete %1" + asm volatile(_ASM_SUB "%2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_ADD "%2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (l->a.counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -73,7 +114,16 @@ static inline int local_dec_and_test(local_t *l) { unsigned char c; - asm volatile(_ASM_DEC "%0; sete %1" + asm volatile(_ASM_DEC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_INC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (l->a.counter), "=qm" (c) : : "memory"); return c != 0; @@ -91,7 +141,16 @@ static inline int local_inc_and_test(local_t *l) { unsigned char c; - asm volatile(_ASM_INC "%0; sete %1" + asm volatile(_ASM_INC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_DEC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (l->a.counter), "=qm" (c) : : "memory"); return c != 0; @@ -110,7 +169,16 @@ static inline int local_add_negative(long i, local_t *l) { unsigned char c; - asm volatile(_ASM_ADD "%2,%0; sets %1" + asm volatile(_ASM_ADD "%2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_SUB "%2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "+m" (l->a.counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -133,7 +201,15 @@ static inline long local_add_return(long i, local_t *l) #endif /* Modern 486+ processor */ __i = i; - asm volatile(_ASM_XADD "%0, %1;" + asm volatile(_ASM_XADD "%0, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_MOV "%0,%1\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+r" (i), "+m" (l->a.counter) : : "memory"); return i + __i; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index ef51b50..514ba37 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -12,13 +12,13 @@ struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; struct microcode_ops { - enum ucode_state (*request_microcode_user) (int cpu, + enum ucode_state (* const request_microcode_user) (int cpu, const void __user *buf, size_t size); - enum ucode_state (*request_microcode_fw) (int cpu, + enum ucode_state (* const request_microcode_fw) (int cpu, struct device *device); - void (*microcode_fini_cpu) (int cpu); + void (* const microcode_fini_cpu) (int cpu); /* * The generic 'microcode_core' part guarantees that @@ -38,18 +38,18 @@ struct ucode_cpu_info { extern struct ucode_cpu_info ucode_cpu_info[]; #ifdef CONFIG_MICROCODE_INTEL -extern struct microcode_ops * __init init_intel_microcode(void); +extern const struct microcode_ops * __init init_intel_microcode(void); #else -static inline struct microcode_ops * __init init_intel_microcode(void) +static inline const struct microcode_ops * __init init_intel_microcode(void) { return NULL; } #endif /* CONFIG_MICROCODE_INTEL */ #ifdef CONFIG_MICROCODE_AMD -extern struct microcode_ops * __init init_amd_microcode(void); +extern const struct microcode_ops * __init init_amd_microcode(void); #else -static inline struct microcode_ops * __init init_amd_microcode(void) +static inline const struct microcode_ops * __init init_amd_microcode(void) { return NULL; } diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 593e51d..fa69c9a 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h @@ -5,4 +5,14 @@ #include +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_32 +#define arch_mmap_check i386_mmap_check +int i386_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags); +#endif +#endif +#endif + #endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 80a1dee..239c67d 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,10 +9,23 @@ * we put the segment information here. */ typedef struct { - void *ldt; + struct desc_struct *ldt; int size; struct mutex lock; - void *vdso; + unsigned long vdso; + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + unsigned long user_cs_base; + unsigned long user_cs_limit; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpumask_t cpu_user_cs_mask; +#endif + +#endif +#endif + } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 8b5393e..8143173 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -24,6 +24,18 @@ void destroy_context(struct mm_struct *mm); static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + unsigned int i; + pgd_t *pgd; + + pax_open_kernel(); + pgd = get_cpu_pgd(smp_processor_id()); + for (i = USER_PGD_PTRS; i < 2 * USER_PGD_PTRS; ++i) + set_pgd_batched(pgd+i, native_make_pgd(0)); + pax_close_kernel(); +#endif + #ifdef CONFIG_SMP if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); @@ -34,16 +46,30 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned cpu = smp_processor_id(); +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) && defined(CONFIG_SMP) + int tlbstate = TLBSTATE_OK; +#endif if (likely(prev != next)) { #ifdef CONFIG_SMP +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) + tlbstate = percpu_read(cpu_tlbstate.state); +#endif percpu_write(cpu_tlbstate.state, TLBSTATE_OK); percpu_write(cpu_tlbstate.active_mm, next); #endif cpumask_set_cpu(cpu, mm_cpumask(next)); /* Re-load page tables */ +#ifdef CONFIG_PAX_PER_CPU_PGD + pax_open_kernel(); + __clone_user_pgds(get_cpu_pgd(cpu), next->pgd, USER_PGD_PTRS); + __shadow_user_pgds(get_cpu_pgd(cpu) + USER_PGD_PTRS, next->pgd, USER_PGD_PTRS); + pax_close_kernel(); + load_cr3(get_cpu_pgd(cpu)); +#else load_cr3(next->pgd); +#endif /* stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); @@ -53,9 +79,38 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ if (unlikely(prev->context.ldt != next->context.ldt)) load_LDT_nolock(&next->context); - } + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + if (!nx_enabled) { + smp_mb__before_clear_bit(); + cpu_clear(cpu, prev->context.cpu_user_cs_mask); + smp_mb__after_clear_bit(); + cpu_set(cpu, next->context.cpu_user_cs_mask); + } +#endif + +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) + if (unlikely(prev->context.user_cs_base != next->context.user_cs_base || + prev->context.user_cs_limit != next->context.user_cs_limit)) + set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); #ifdef CONFIG_SMP + else if (unlikely(tlbstate != TLBSTATE_OK)) + set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); +#endif +#endif + + } else { + +#ifdef CONFIG_PAX_PER_CPU_PGD + pax_open_kernel(); + __clone_user_pgds(get_cpu_pgd(cpu), next->pgd, USER_PGD_PTRS); + __shadow_user_pgds(get_cpu_pgd(cpu) + USER_PGD_PTRS, next->pgd, USER_PGD_PTRS); + pax_close_kernel(); + load_cr3(get_cpu_pgd(cpu)); +#endif + +#ifdef CONFIG_SMP percpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); @@ -64,11 +119,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. */ + +#ifndef CONFIG_PAX_PER_CPU_PGD load_cr3(next->pgd); +#endif + load_LDT_nolock(&next->context); + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) + if (!nx_enabled) + cpu_set(cpu, next->context.cpu_user_cs_mask); +#endif + +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) +#ifdef CONFIG_PAX_PAGEEXEC + if (!((next->pax_flags & MF_PAX_PAGEEXEC) && nx_enabled)) +#endif + set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); +#endif + } +#endif } -#endif } #define activate_mm(prev, next) \ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 3e2ce58..caaf478 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,6 +5,7 @@ #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ +#define MODULE_PROC_FAMILY "" #elif defined CONFIG_M386 #define MODULE_PROC_FAMILY "386 " #elif defined CONFIG_M486 @@ -59,13 +60,26 @@ #error unknown processor family #endif -#ifdef CONFIG_X86_32 -# ifdef CONFIG_4KSTACKS -# define MODULE_STACKSIZE "4KSTACKS " -# else -# define MODULE_STACKSIZE "" -# endif -# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE +#if defined(CONFIG_X86_32) && defined(CONFIG_4KSTACKS) +#define MODULE_STACKSIZE "4KSTACKS " +#else +#define MODULE_STACKSIZE "" #endif +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_BTS +#define MODULE_PAX_KERNEXEC "KERNEXEC_BTS " +#elif defined(CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR) +#define MODULE_PAX_KERNEXEC "KERNEXEC_OR " +#else +#define MODULE_PAX_KERNEXEC "" +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF +#define MODULE_PAX_UDEREF "UDEREF " +#else +#define MODULE_PAX_UDEREF "" +#endif + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE MODULE_PAX_KERNEXEC MODULE_PAX_UDEREF + #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 7639dbf..e08a58c 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -56,7 +56,7 @@ void copy_page(void *to, void *from); /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; -extern unsigned long phys_base; +extern const unsigned long phys_base; extern unsigned long __phys_addr(unsigned long); #define __phys_reloc_hide(x) (x) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index efb3899..ef30687 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -648,6 +648,18 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) val); } +static inline void set_pgd_batched(pgd_t *pgdp, pgd_t pgd) +{ + pgdval_t val = native_pgd_val(pgd); + + if (sizeof(pgdval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pgd_batched, pgdp, + val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pgd_batched, pgdp, + val); +} + static inline void pgd_clear(pgd_t *pgdp) { set_pgd(pgdp, __pgd(0)); @@ -729,6 +741,21 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, pv_mmu_ops.set_fixmap(idx, phys, flags); } +#ifdef CONFIG_PAX_KERNEXEC +static inline unsigned long pax_open_kernel(void) +{ + return PVOP_CALL0(unsigned long, pv_mmu_ops.pax_open_kernel); +} + +static inline unsigned long pax_close_kernel(void) +{ + return PVOP_CALL0(unsigned long, pv_mmu_ops.pax_close_kernel); +} +#else +static inline unsigned long pax_open_kernel(void) { return 0; } +static inline unsigned long pax_close_kernel(void) { return 0; } +#endif + #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static inline int __raw_spin_is_locked(struct raw_spinlock *lock) @@ -945,7 +972,7 @@ extern void default_banner(void); #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) -#define PARA_INDIRECT(addr) *%cs:addr +#define PARA_INDIRECT(addr) *%ss:addr #endif #define INTERRUPT_RETURN \ @@ -1022,6 +1049,21 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) + +#define GET_CR0_INTO_RDI \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ + mov %rax,%rdi + +#define SET_RDI_INTO_CR0 \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0) + +#define GET_CR3_INTO_RDI \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr3); \ + mov %rax,%rdi + +#define SET_RDI_INTO_CR3 \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_write_cr3) + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 9357473..aeb2de5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -78,19 +78,19 @@ struct pv_init_ops { */ unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, unsigned long addr, unsigned len); -}; +} __no_const; struct pv_lazy_ops { /* Set deferred update mode, used for batching operations. */ void (*enter)(void); void (*leave)(void); -}; +} __no_const; struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long (*get_tsc_khz)(void); -}; +} __no_const; struct pv_cpu_ops { /* hooks for various privileged instructions */ @@ -186,7 +186,7 @@ struct pv_cpu_ops { void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); -}; +} __no_const; struct pv_irq_ops { /* @@ -217,7 +217,7 @@ struct pv_apic_ops { unsigned long start_eip, unsigned long start_esp); #endif -}; +} __no_const; struct pv_mmu_ops { unsigned long (*read_cr2)(void); @@ -301,6 +301,7 @@ struct pv_mmu_ops { struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); + void (*set_pgd_batched)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ #endif /* PAGETABLE_LEVELS >= 3 */ @@ -316,6 +317,12 @@ struct pv_mmu_ops { an mfn. We can tell which is which from the index. */ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long (*pax_open_kernel)(void); + unsigned long (*pax_close_kernel)(void); +#endif + }; struct raw_spinlock; @@ -326,7 +333,7 @@ struct pv_lock_ops { void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags); int (*spin_trylock)(struct raw_spinlock *lock); void (*spin_unlock)(struct raw_spinlock *lock); -}; +} __no_const; /* This contains all the paravirt structures: we get a convenient * number for each function using the offset which we use to indicate diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988..3f47c38 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -89,16 +89,16 @@ extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); struct pci_raw_ops { - int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, + int (* const read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); - int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, + int (* const write)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 val); }; -extern struct pci_raw_ops *raw_pci_ops; -extern struct pci_raw_ops *raw_pci_ext_ops; +extern const struct pci_raw_ops *raw_pci_ops; +extern const struct pci_raw_ops *raw_pci_ext_ops; -extern struct pci_raw_ops pci_direct_conf1; +extern const struct pci_raw_ops pci_direct_conf1; extern bool port_cf9_safe; /* arch_initcall level */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b65a36d..50345a4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -78,6 +78,7 @@ do { \ if (0) { \ T__ tmp__; \ tmp__ = (val); \ + (void)tmp__; \ } \ switch (sizeof(var)) { \ case 1: \ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 271de94..ef944d6 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -63,6 +63,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); +} + +static inline void pmd_populate_user(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 2334982..70bc412 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -18,7 +18,9 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + pax_open_kernel(); *pmdp = pmd; + pax_close_kernel(); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 33927d2..ccde329 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -38,12 +38,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + pax_open_kernel(); set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); + pax_close_kernel(); } static inline void native_set_pud(pud_t *pudp, pud_t pud) { + pax_open_kernel(); set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); + pax_close_kernel(); } /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af6fd36..867ff74 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -39,6 +39,7 @@ extern struct list_head pgd_list; #ifndef __PAGETABLE_PUD_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define set_pgd_batched(pgdp, pgd) native_set_pgd_batched(pgdp, pgd) #define pgd_clear(pgd) native_pgd_clear(pgd) #endif @@ -74,12 +75,51 @@ extern struct list_head pgd_list; #define arch_end_context_switch(prev) do {} while(0) +#define pax_open_kernel() native_pax_open_kernel() +#define pax_close_kernel() native_pax_close_kernel() #endif /* CONFIG_PARAVIRT */ +#define __HAVE_ARCH_PAX_OPEN_KERNEL +#define __HAVE_ARCH_PAX_CLOSE_KERNEL + +#ifdef CONFIG_PAX_KERNEXEC +static inline unsigned long native_pax_open_kernel(void) +{ + unsigned long cr0; + + preempt_disable(); + barrier(); + cr0 = read_cr0() ^ X86_CR0_WP; + BUG_ON(unlikely(cr0 & X86_CR0_WP)); + write_cr0(cr0); + return cr0 ^ X86_CR0_WP; +} + +static inline unsigned long native_pax_close_kernel(void) +{ + unsigned long cr0; + + cr0 = read_cr0() ^ X86_CR0_WP; + BUG_ON(unlikely(!(cr0 & X86_CR0_WP))); + write_cr0(cr0); + barrier(); + preempt_enable_no_resched(); + return cr0 ^ X86_CR0_WP; +} +#else +static inline unsigned long native_pax_open_kernel(void) { return 0; } +static inline unsigned long native_pax_close_kernel(void) { return 0; } +#endif + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ +static inline int pte_user(pte_t pte) +{ + return pte_val(pte) & _PAGE_USER; +} + static inline int pte_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_DIRTY; @@ -167,9 +207,29 @@ static inline pte_t pte_wrprotect(pte_t pte) return pte_clear_flags(pte, _PAGE_RW); } +static inline pte_t pte_mkread(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_USER); +} + static inline pte_t pte_mkexec(pte_t pte) { - return pte_clear_flags(pte, _PAGE_NX); +#ifdef CONFIG_X86_PAE + if (__supported_pte_mask & _PAGE_NX) + return pte_clear_flags(pte, _PAGE_NX); + else +#endif + return pte_set_flags(pte, _PAGE_USER); +} + +static inline pte_t pte_exprotect(pte_t pte) +{ +#ifdef CONFIG_X86_PAE + if (__supported_pte_mask & _PAGE_NX) + return pte_set_flags(pte, _PAGE_NX); + else +#endif + return pte_clear_flags(pte, _PAGE_USER); } static inline pte_t pte_mkdirty(pte_t pte) @@ -302,6 +362,15 @@ pte_t *populate_extra_pte(unsigned long vaddr); #endif #ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAX_PER_CPU_PGD +extern pgd_t cpu_pgd[NR_CPUS][PTRS_PER_PGD]; +static inline pgd_t *get_cpu_pgd(unsigned int cpu) +{ + return cpu_pgd[cpu]; +} +#endif + #include static inline int pte_none(pte_t pte) @@ -472,7 +541,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pgd_flags(pgd) & ~(_PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -495,7 +564,12 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +#ifdef CONFIG_PAX_PER_CPU_PGD +#define pgd_offset_cpu(cpu, address) (get_cpu_pgd(cpu) + pgd_index(address)) +#endif + /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's @@ -506,6 +580,20 @@ static inline int pgd_none(pgd_t pgd) #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) +#ifdef CONFIG_X86_32 +#define USER_PGD_PTRS KERNEL_PGD_BOUNDARY +#else +#define TASK_SIZE_MAX_SHIFT CONFIG_TASK_SIZE_MAX_SHIFT +#define USER_PGD_PTRS (_AC(1,UL) << (TASK_SIZE_MAX_SHIFT - PGDIR_SHIFT)) + +#ifdef CONFIG_PAX_MEMORY_UDEREF +#define PAX_USER_SHADOW_BASE (_AC(1,UL) << TASK_SIZE_MAX_SHIFT) +#else +#define PAX_USER_SHADOW_BASE (_AC(0,UL)) +#endif + +#endif + #ifndef __ASSEMBLY__ extern int direct_gbpages; @@ -611,11 +699,23 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, * dst and src can be on the same page, but the range must not overlap, * and must not cross a page boundary. */ -static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +static inline void clone_pgd_range(pgd_t *dst, const pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + pax_open_kernel(); + while (count--) + *dst++ = *src++; + pax_close_kernel(); } +#ifdef CONFIG_PAX_PER_CPU_PGD +extern void __clone_user_pgds(pgd_t *dst, const pgd_t *src, int count); +#endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +extern void __shadow_user_pgds(pgd_t *dst, const pgd_t *src, int count); +#else +static inline void __shadow_user_pgds(pgd_t *dst, const pgd_t *src, int count) {} +#endif #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 750f1bf..971e839 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -26,9 +26,6 @@ struct mm_struct; struct vm_area_struct; -extern pgd_t swapper_pg_dir[1024]; -extern pgd_t trampoline_pg_dir[1024]; - static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); @@ -49,6 +46,12 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); # include #endif +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t trampoline_pg_dir[PTRS_PER_PGD]; +#ifdef CONFIG_X86_PAE +extern pmd_t swapper_pm_dir[PTRS_PER_PGD][PTRS_PER_PMD]; +#endif + #if defined(CONFIG_HIGHPTE) #define __KM_PTE \ (in_nmi() ? KM_NMI_PTE : \ @@ -73,7 +76,9 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* Clear a kernel PTE and flush it from the TLB */ #define kpte_clear_flush(ptep, vaddr) \ do { \ + pax_open_kernel(); \ pte_clear(&init_mm, (vaddr), (ptep)); \ + pax_close_kernel(); \ __flush_tlb_one((vaddr)); \ } while (0) @@ -85,6 +90,9 @@ do { \ #endif /* !__ASSEMBLY__ */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + /* * kern_addr_valid() is (1) for FLATMEM and (0) for * SPARSEMEM and DISCONTIGMEM diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 5e67c15..12d5c47 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -8,7 +8,7 @@ */ #ifdef CONFIG_X86_PAE # include -# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) # define PMD_MASK (~(PMD_SIZE - 1)) #else # include @@ -46,6 +46,19 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif +#ifdef CONFIG_PAX_KERNEXEC +#ifndef __ASSEMBLY__ +extern unsigned char MODULES_EXEC_VADDR[]; +extern unsigned char MODULES_EXEC_END[]; +#endif +#include +#define ktla_ktva(addr) (addr + LOAD_PHYSICAL_ADDR + PAGE_OFFSET) +#define ktva_ktla(addr) (addr - LOAD_PHYSICAL_ADDR - PAGE_OFFSET) +#else +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) +#endif + #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END #define MODULES_LEN (MODULES_VADDR - MODULES_END) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c57a301..6b414ff 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -16,10 +16,14 @@ extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; +extern pud_t level3_vmalloc_start_pgt[512]; +extern pud_t level3_vmalloc_end_pgt[512]; +extern pud_t level3_vmemmap_pgt[512]; +extern pud_t level2_vmemmap_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; -extern pmd_t level2_ident_pgt[512]; -extern pgd_t init_level4_pgt[]; +extern pmd_t level2_ident_pgt[512*2]; +extern pgd_t init_level4_pgt[512]; #define swapper_pg_dir init_level4_pgt @@ -74,7 +78,9 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + pax_open_kernel(); *pmdp = pmd; + pax_close_kernel(); } static inline void native_pmd_clear(pmd_t *pmd) @@ -94,6 +100,13 @@ static inline void native_pud_clear(pud_t *pud) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { + pax_open_kernel(); + *pgdp = pgd; + pax_close_kernel(); +} + +static inline void native_set_pgd_batched(pgd_t *pgdp, pgd_t pgd) +{ *pgdp = pgd; } diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 766ea16..5b96cb3 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -59,5 +59,10 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define MODULES_EXEC_VADDR MODULES_VADDR +#define MODULES_EXEC_END MODULES_END + +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d1f4a76..2f46ba1 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -16,12 +16,11 @@ #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_PAT 7 /* on 4KB pages */ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ -#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +#define _PAGE_BIT_SPECIAL 9 /* special mappings, no associated struct page */ #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ #define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ -#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 -#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SPECIAL #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ /* If _PAGE_BIT_PRESENT is clear, we use these: */ @@ -39,7 +38,6 @@ #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) @@ -55,8 +53,10 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) -#else +#elif defined(CONFIG_KMEMCHECK) #define _PAGE_NX (_AT(pteval_t, 0)) +#else +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #endif #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) @@ -93,6 +93,9 @@ #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ _PAGE_ACCESSED) +#define PAGE_READONLY_NOEXEC PAGE_READONLY +#define PAGE_SHARED_NOEXEC PAGE_SHARED + #define __PAGE_KERNEL_EXEC \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) @@ -103,8 +106,8 @@ #define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) -#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RO | _PAGE_USER) +#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_RO | _PAGE_PCD | _PAGE_PWT | _PAGE_USER) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -163,8 +166,8 @@ * bits are combined, this will alow user to access the high address mapped * VDSO in the presence of CONFIG_COMPAT_VDSO */ -#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ -#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PTE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ +#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ #endif @@ -202,7 +205,17 @@ static inline pgdval_t pgd_flags(pgd_t pgd) { return native_pgd_val(pgd) & PTE_FLAGS_MASK; } +#endif +#if PAGETABLE_LEVELS == 3 +#include +#endif + +#if PAGETABLE_LEVELS == 2 +#include +#endif + +#ifndef __ASSEMBLY__ #if PAGETABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; @@ -216,8 +229,6 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else -#include - static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.pgd); @@ -237,8 +248,6 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else -#include - static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.pgd); @@ -278,7 +287,16 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern void set_nx(void); + +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_PAE extern int nx_enabled; +#else +#define nx_enabled (0) +#endif +#else +#define nx_enabled (1) +#endif #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fa04dea..5f823fc 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -272,7 +272,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); +extern struct tss_struct init_tss[NR_CPUS]; /* * Save the original ist values for checking stack pointers during debugging @@ -911,11 +911,18 @@ static inline void spin_lock_prefetch(const void *x) */ #define TASK_SIZE PAGE_OFFSET #define TASK_SIZE_MAX TASK_SIZE + +#ifdef CONFIG_PAX_SEGMEXEC +#define SEGMEXEC_TASK_SIZE (TASK_SIZE / 2) +#define STACK_TOP ((current->mm->pax_flags & MF_PAX_SEGMEXEC)?SEGMEXEC_TASK_SIZE:TASK_SIZE) +#else #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP +#endif + +#define STACK_TOP_MAX TASK_SIZE #define INIT_THREAD { \ - .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ @@ -929,7 +936,7 @@ static inline void spin_lock_prefetch(const void *x) */ #define INIT_TSS { \ .x86_tss = { \ - .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ .ss0 = __KERNEL_DS, \ .ss1 = __KERNEL_CS, \ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ @@ -940,11 +947,7 @@ static inline void spin_lock_prefetch(const void *x) extern unsigned long thread_saved_pc(struct task_struct *tsk); #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) -#define KSTK_TOP(info) \ -({ \ - unsigned long *__ptr = (unsigned long *)(info); \ - (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ -}) +#define KSTK_TOP(info) ((container_of(info, struct task_struct, tinfo))->thread.sp0) /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. @@ -959,7 +962,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(task) \ ({ \ struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ + __regs__ = (struct pt_regs *)((task)->thread.sp0); \ __regs__ - 1; \ }) @@ -969,13 +972,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * User space process size. 47bits minus one guard page. */ -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_MAX ((1UL << TASK_SIZE_MAX_SHIFT) - PAGE_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ - 0xc0000000 : 0xFFFFe000) + 0xc0000000 : 0xFFFFf000) #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) @@ -986,11 +989,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ - .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ + .sp0 = (unsigned long)&init_stack + sizeof(init_stack) - 16 \ } #define INIT_TSS { \ - .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ + .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) - 16 \ } /* @@ -1012,6 +1015,10 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, */ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) +#ifdef CONFIG_PAX_SEGMEXEC +#define SEGMEXEC_TASK_UNMAPPED_BASE (PAGE_ALIGN(SEGMEXEC_TASK_SIZE / 3)) +#endif + #define KSTK_EIP(task) (task_pt_regs(task)->ip) /* Get/set a process' ability to use the timestamp counter instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908..f2e3da2 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -151,28 +151,29 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) } /* - * user_mode_vm(regs) determines whether a register set came from user mode. + * user_mode(regs) determines whether a register set came from user mode. * This is true if V8086 mode was enabled OR if the register set was from * protected mode with RPL-3 CS value. This tricky test checks that with * one comparison. Many places in the kernel can bypass this full check - * if they have already ruled out V8086 mode, so user_mode(regs) can be used. + * if they have already ruled out V8086 mode, so user_mode_novm(regs) can + * be used. */ -static inline int user_mode(struct pt_regs *regs) +static inline int user_mode_novm(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; #else - return !!(regs->cs & 3); + return !!(regs->cs & SEGMENT_RPL_MASK); #endif } -static inline int user_mode_vm(struct pt_regs *regs) +static inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; #else - return user_mode(regs); + return user_mode_novm(regs); #endif } diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 562d4fd..6e39df1 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -6,19 +6,19 @@ struct pt_regs; struct machine_ops { - void (*restart)(char *cmd); - void (*halt)(void); - void (*power_off)(void); + void (* __noreturn restart)(char *cmd); + void (* __noreturn halt)(void); + void (* __noreturn power_off)(void); void (*shutdown)(void); void (*crash_shutdown)(struct pt_regs *); - void (*emergency_restart)(void); -}; + void (* __noreturn emergency_restart)(void); +} __no_const; extern struct machine_ops machine_ops; void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); -void machine_real_restart(const unsigned char *code, int length); +void machine_real_restart(const unsigned char *code, unsigned int length) __noreturn; typedef void (*nmi_shootdown_cb)(int, struct die_args*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 606ede1..dbfff37 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -118,6 +118,14 @@ static inline void __down_read(struct rw_semaphore *sem) { asm volatile("# beginning down_read\n\t" LOCK_PREFIX _ASM_INC "(%1)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX _ASM_DEC "(%1)\n\t" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* adds 0x00000001, returns the old value */ " jns 1f\n" " call call_rwsem_down_read_failed\n" @@ -139,6 +147,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) "1:\n\t" " mov %1,%2\n\t" " add %3,%2\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "sub %3,%2\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + " jle 2f\n\t" LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" @@ -160,6 +176,14 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) tmp = RWSEM_ACTIVE_WRITE_BIAS; asm volatile("# beginning down_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %1,(%2)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* subtract 0x0000ffff, returns the old value */ " test %1,%1\n\t" /* was the count 0 before? */ @@ -198,6 +222,14 @@ static inline void __up_read(struct rw_semaphore *sem) rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS; asm volatile("# beginning __up_read\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %1,(%2)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* subtracts 1, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" @@ -216,6 +248,14 @@ static inline void __up_write(struct rw_semaphore *sem) rwsem_count_t tmp; asm volatile("# beginning __up_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %1,(%2)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* tries to transition 0xffff0001 -> 0x00000000 */ " jz 1f\n" @@ -234,6 +274,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { asm volatile("# beginning __downgrade_write\n\t" LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX _ASM_SUB "%2,(%1)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) @@ -253,7 +301,15 @@ static inline void __downgrade_write(struct rw_semaphore *sem) static inline void rwsem_atomic_add(rwsem_count_t delta, struct rw_semaphore *sem) { - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" + asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX _ASM_SUB "%1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (sem->count) : "er" (delta)); } @@ -266,7 +322,15 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, { rwsem_count_t tmp = delta; - asm volatile(LOCK_PREFIX "xadd %0,%1" + asm volatile(LOCK_PREFIX "xadd %0,%1\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %0,%1\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+r" (tmp), "+m" (sem->count) : : "memory"); diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 14e0ed8..7f7dd5e 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -62,10 +62,15 @@ * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] * 28 - stack_canary-20 [ for stack protector ] - * 29 - unused - * 30 - unused + * 29 - PCI BIOS CS + * 30 - PCI BIOS DS * 31 - TSS for double fault handler */ +#define GDT_ENTRY_KERNEXEC_EFI_CS (1) +#define GDT_ENTRY_KERNEXEC_EFI_DS (2) +#define __KERNEXEC_EFI_CS (GDT_ENTRY_KERNEXEC_EFI_CS*8) +#define __KERNEXEC_EFI_DS (GDT_ENTRY_KERNEXEC_EFI_DS*8) + #define GDT_ENTRY_TLS_MIN 6 #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) @@ -77,6 +82,8 @@ #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) +#define GDT_ENTRY_KERNEXEC_KERNEL_CS (4) + #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) @@ -88,7 +95,7 @@ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) -#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) +#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE + 15) #ifdef CONFIG_SMP #define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) #else @@ -102,6 +109,12 @@ #define __KERNEL_STACK_CANARY 0 #endif +#define GDT_ENTRY_PCIBIOS_CS (GDT_ENTRY_KERNEL_BASE + 17) +#define __PCIBIOS_CS (GDT_ENTRY_PCIBIOS_CS * 8) + +#define GDT_ENTRY_PCIBIOS_DS (GDT_ENTRY_KERNEL_BASE + 18) +#define __PCIBIOS_DS (GDT_ENTRY_PCIBIOS_DS * 8) + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* @@ -139,7 +152,7 @@ */ /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xFFFCU) == PNP_CS32 || ((x) & 0xFFFCU) == PNP_CS16) #else @@ -163,6 +176,8 @@ #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) #define __USER32_DS __USER_DS +#define GDT_ENTRY_KERNEXEC_KERNEL_CS 7 + #define GDT_ENTRY_TSS 8 /* needs two entries */ #define GDT_ENTRY_LDT 10 /* needs two entries */ #define GDT_ENTRY_TLS_MIN 12 @@ -183,6 +198,7 @@ #endif #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) +#define __KERNEXEC_KERNEL_CS (GDT_ENTRY_KERNEXEC_KERNEL_CS * 8) #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS* 8 + 3) #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS* 8 + 3) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4c2f63c..5685db2 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -24,7 +24,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -DECLARE_PER_CPU(int, cpu_number); +DECLARE_PER_CPU(unsigned int, cpu_number); static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -40,10 +40,7 @@ DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); /* Static state in head.S used to set up a CPU */ -extern struct { - void *sp; - unsigned short ss; -} stack_start; +extern unsigned long stack_start; /* Initial stack pointer address */ struct smp_ops { void (*smp_prepare_boot_cpu)(void); @@ -60,7 +57,7 @@ struct smp_ops { void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); -}; +} __no_const; /* Globals due to paravirt */ extern void set_cpu_sibling_map(int cpu); @@ -175,14 +172,8 @@ extern unsigned disabled_cpus __cpuinitdata; extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() (percpu_read(cpu_number)) - -#define stack_smp_processor_id() \ -({ \ - struct thread_info *ti; \ - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->cpu; \ -}) +#define raw_smp_processor_id() (percpu_read(cpu_number)) +#define stack_smp_processor_id() raw_smp_processor_id() #define safe_smp_processor_id() smp_processor_id() #endif diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 4e77853..4359783 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -249,6 +249,14 @@ static inline int __raw_write_can_lock(raw_rwlock_t *lock) static inline void __raw_read_lock(raw_rwlock_t *rw) { asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX " addl $1,(%0)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + "jns 1f\n" "call __read_lock_failed\n\t" "1:\n" @@ -258,6 +266,14 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) static inline void __raw_write_lock(raw_rwlock_t *rw) { asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX " addl %1,(%0)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + "jz 1f\n" "call __write_lock_failed\n\t" "1:\n" @@ -286,12 +302,29 @@ static inline int __raw_write_trylock(raw_rwlock_t *lock) static inline void __raw_read_unlock(raw_rwlock_t *rw) { - asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + :"+m" (rw->lock) : : "memory"); } static inline void __raw_write_unlock(raw_rwlock_t *rw) { - asm volatile(LOCK_PREFIX "addl %1, %0" + asm volatile(LOCK_PREFIX "addl %1, %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %1, %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 1575177..cb23f52 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -48,7 +48,7 @@ * head_32 for boot CPU and setup_per_cpu_areas() for others. */ #define GDT_STACK_CANARY_INIT \ - [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18), + [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x17), /* * Initialize the stackprotector canary value. @@ -113,7 +113,7 @@ static inline void setup_stack_canary_segment(int cpu) static inline void load_stack_canary_segment(void) { -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && !defined(CONFIG_PAX_MEMORY_UDEREF) asm volatile ("mov %0, %%gs" : : "r" (0)); #endif } diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index e0fbf29..858ef4a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -132,7 +132,7 @@ do { \ "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ + "movq "__percpu_arg([thread_info])",%%r8\n\t" \ "movq %%rax,%%rdi\n\t" \ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ "jnz ret_from_fork\n\t" \ @@ -143,7 +143,7 @@ do { \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [thread_info] "m" (per_cpu_var(current_tinfo)), \ [current_task] "m" (per_cpu_var(current_task)) \ __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) @@ -200,7 +200,7 @@ static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); - return __limit + 1; + return __limit; } static inline void native_clts(void) @@ -340,12 +340,12 @@ void enable_hlt(void); void cpu_idle_wait(void); -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); -void stop_this_cpu(void *dummy); +void stop_this_cpu(void *dummy) __noreturn; /* * Force strict CPU ordering. diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 19c3ce4..8962535 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * low level task data that entry.S needs immediate access to @@ -24,7 +25,6 @@ struct exec_domain; #include struct thread_info { - struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ @@ -34,18 +34,12 @@ struct thread_info { mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; -#ifdef CONFIG_X86_32 - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - */ - __u8 supervisor_stack[0]; -#endif + unsigned long lowest_stack; int uaccess_err; }; -#define INIT_THREAD_INFO(tsk) \ +#define INIT_THREAD_INFO \ { \ - .task = &tsk, \ .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ @@ -56,7 +50,7 @@ struct thread_info { }, \ } -#define init_thread_info (init_thread_union.thread_info) +#define init_thread_info (init_thread_union.stack) #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ @@ -163,45 +157,40 @@ struct thread_info { #define alloc_thread_info(tsk) \ ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) -#ifdef CONFIG_X86_32 - -#define STACK_WARN (THREAD_SIZE/8) -/* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ - - -/* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -#else /* !__ASSEMBLY__ */ - +#ifdef __ASSEMBLY__ /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg + mov PER_CPU_VAR(current_tinfo), reg /* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg +#define GET_THREAD_INFO_WITH_ESP(reg) GET_THREAD_INFO(reg) +#else +/* how to get the thread information struct from C */ +DECLARE_PER_CPU(struct thread_info *, current_tinfo); + +static __always_inline struct thread_info *current_thread_info(void) +{ + return percpu_read_stable(current_tinfo); +} +#endif + +#ifdef CONFIG_X86_32 + +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; #endif #else /* X86_32 */ -#include -#define KERNEL_STACK_OFFSET (5*8) - /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. @@ -209,21 +198,8 @@ static inline struct thread_info *current_thread_info(void) #ifndef __ASSEMBLY__ DECLARE_PER_CPU(unsigned long, kernel_stack); -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(percpu_read_stable(kernel_stack) + - KERNEL_STACK_OFFSET - THREAD_SIZE); - return ti; -} - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movq PER_CPU_VAR(kernel_stack),reg ; \ - subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg - +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("rsp") __used; #endif #endif /* !X86_32 */ @@ -260,5 +236,16 @@ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define arch_task_cache_init arch_task_cache_init + +#define __HAVE_THREAD_FUNCTIONS +#define task_thread_info(task) (&(task)->tinfo) +#define task_stack_page(task) ((task)->stack) +#define setup_thread_stack(p, org) do {} while (0) +#define end_of_stack(p) ((unsigned long *)task_stack_page(p) + 1) + +#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR +extern struct task_struct *alloc_task_struct(void); +extern void free_task_struct(struct task_struct *); + #endif #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 61c5874..8a046e9 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -8,12 +8,15 @@ #include #include #include +#include #include #include #define VERIFY_READ 0 #define VERIFY_WRITE 1 +extern void check_object_size(const void *ptr, unsigned long n, bool to); + /* * The fs value determines whether argument validity checking should be * performed or not. If get_fs() == USER_DS, checking is performed, with @@ -29,7 +32,12 @@ #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +void __set_fs(mm_segment_t x); +void set_fs(mm_segment_t x); +#else #define set_fs(x) (current_thread_info()->addr_limit = (x)) +#endif #define segment_eq(a, b) ((a).seg == (b).seg) @@ -77,7 +85,33 @@ * checks that the pointer is in the user space range - after calling * this function, memory access functions may still return -EFAULT. */ -#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define __access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok(type, addr, size) \ +({ \ + long __size = size; \ + unsigned long __addr = (unsigned long)addr; \ + unsigned long __addr_ao = __addr & PAGE_MASK; \ + unsigned long __end_ao = __addr + __size - 1; \ + bool __ret_ao = __range_not_ok(__addr, __size) == 0; \ + if (__ret_ao && unlikely((__end_ao ^ __addr_ao) & PAGE_MASK)) { \ + while(__addr_ao <= __end_ao) { \ + char __c_ao; \ + __addr_ao += PAGE_SIZE; \ + if (__size > PAGE_SIZE) \ + cond_resched(); \ + if (__get_user(__c_ao, (char __user *)__addr)) \ + break; \ + if (type != VERIFY_WRITE) { \ + __addr = __addr_ao; \ + continue; \ + } \ + if (__put_user(__c_ao, (char __user *)__addr)) \ + break; \ + __addr = __addr_ao; \ + } \ + } \ + __ret_ao; \ +}) /* * The exception table consists of pairs of addresses: the first is the @@ -183,12 +217,20 @@ extern int __get_user_bad(void); asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") - +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __copyuser_seg "gs;" +#define __COPYUSER_SET_ES "pushl %%gs; popl %%es\n" +#define __COPYUSER_RESTORE_ES "pushl %%ss; popl %%es\n" +#else +#define __copyuser_seg +#define __COPYUSER_SET_ES +#define __COPYUSER_RESTORE_ES +#endif #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile("1: movl %%eax,0(%2)\n" \ - "2: movl %%edx,4(%2)\n" \ + asm volatile("1: "__copyuser_seg"movl %%eax,0(%2)\n" \ + "2: "__copyuser_seg"movl %%edx,4(%2)\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ @@ -200,8 +242,8 @@ extern int __get_user_bad(void); : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile("1: movl %%eax,0(%1)\n" \ - "2: movl %%edx,4(%1)\n" \ + asm volatile("1: "__copyuser_seg"movl %%eax,0(%1)\n" \ + "2: "__copyuser_seg"movl %%edx,4(%1)\n" \ "3:\n" \ _ASM_EXTABLE(1b, 2b - 1b) \ _ASM_EXTABLE(2b, 3b - 2b) \ @@ -253,7 +295,7 @@ extern void __put_user_8(void); __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ might_fault(); \ - __pu_val = x; \ + __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_x(1, __pu_val, ptr, __ret_pu); \ @@ -374,7 +416,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %2,%"rtype"1\n"\ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ @@ -382,7 +424,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(1b, 3b) \ - : "=r" (err), ltype(x) \ + : "=r" (err), ltype (x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) #define __get_user_size_ex(x, ptr, size) \ @@ -407,7 +449,7 @@ do { \ } while (0) #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %1,%"rtype"0\n"\ "2:\n" \ _ASM_EXTABLE(1b, 2b - 1b) \ : ltype(x) : "m" (__m(addr))) @@ -424,13 +466,24 @@ do { \ int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct __user *)(x)) +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define ____m(x) \ +({ \ + unsigned long ____x = (unsigned long)(x); \ + if (____x < PAX_USER_SHADOW_BASE) \ + ____x += PAX_USER_SHADOW_BASE; \ + (void __user *)____x; \ +}) +#else +#define ____m(x) (x) +#endif +#define __m(x) (*(struct __large_struct __user *)____m(x)) /* * Tell gcc we read from memory instead of writing: this is because @@ -438,7 +491,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %"rtype"1,%2\n"\ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ @@ -446,10 +499,10 @@ struct __large_struct { unsigned long buf[100]; }; ".previous\n" \ _ASM_EXTABLE(1b, 3b) \ : "=r"(err) \ - : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) + : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err)) #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %"rtype"0,%1\n"\ "2:\n" \ _ASM_EXTABLE(1b, 2b - 1b) \ : : ltype(x), "m" (__m(addr))) @@ -488,8 +541,12 @@ struct __large_struct { unsigned long buf[100]; }; * On error, the variable @x is set to zero. */ +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __get_user(x, ptr) get_user((x), (ptr)) +#else #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr))) +#endif /** * __put_user: - Write a simple value into user space, with less checking. @@ -511,8 +568,12 @@ struct __large_struct { unsigned long buf[100]; }; * Returns zero on success, or -EFAULT on error. */ +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __put_user(x, ptr) put_user((x), (ptr)) +#else #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#endif #define __get_user_unaligned __get_user #define __put_user_unaligned __put_user @@ -530,7 +591,7 @@ struct __large_struct { unsigned long buf[100]; }; #define get_user_ex(x, ptr) do { \ unsigned long __gue_val; \ __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ - (x) = (__force __typeof__(*(ptr)))__gue_val; \ + (x) = (__typeof__(*(ptr)))__gue_val; \ } while (0) #ifdef CONFIG_X86_WP_WORKS_OK @@ -567,6 +628,7 @@ extern struct movsl_mask { #define ARCH_HAS_NOCACHE_UACCESS 1 +#define ARCH_HAS_SORT_EXTABLE #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44..e30e334 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -44,6 +44,11 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + pax_track_stack(); + + if ((long)n < 0) + return n; + if (__builtin_constant_p(n)) { unsigned long ret; @@ -62,6 +67,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) return ret; } } + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_to_user_ll(to, from, n); } @@ -83,12 +90,16 @@ static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + /* Avoid zeroing the tail if the copy fails.. * If 'n' is constant and 1, 2, or 4, we do still zero on a failure, * but as the zeroing behaviour is only significant when n is not @@ -138,6 +149,12 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + + pax_track_stack(); + + if ((long)n < 0) + return n; + if (__builtin_constant_p(n)) { unsigned long ret; @@ -153,6 +170,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) return ret; } } + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_from_user_ll(to, from, n); } @@ -160,6 +179,10 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { might_fault(); + + if ((long)n < 0) + return n; + if (__builtin_constant_p(n)) { unsigned long ret; @@ -182,14 +205,62 @@ static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n) { - return __copy_from_user_ll_nocache_nozero(to, from, n); + if ((long)n < 0) + return n; + + return __copy_from_user_ll_nocache_nozero(to, from, n); +} + +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +static __always_inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +static __always_inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else if ((long)n > 0) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + memset(to, 0, n); + } + return n; } -unsigned long __must_check copy_to_user(void __user *to, - const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, - const void __user *from, - unsigned long n); long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b21..f595ae7 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -9,6 +9,9 @@ #include #include #include +#include + +#define set_fs(x) (current_thread_info()->addr_limit = (x)) /* * Copy To/From Userspace @@ -16,116 +19,205 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long -copy_user_generic(void *to, const void *from, unsigned len); +copy_user_generic(void *to, const void *from, unsigned long len); __must_check unsigned long -copy_to_user(void __user *to, const void *from, unsigned len); -__must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); -__must_check unsigned long -copy_in_user(void __user *to, const void __user *from, unsigned len); +copy_in_user(void __user *to, const void __user *from, unsigned long len); static __always_inline __must_check -int __copy_from_user(void *dst, const void __user *src, unsigned size) +unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long size) { - int ret = 0; + unsigned ret = 0; might_fault(); - if (!__builtin_constant_p(size)) - return copy_user_generic(dst, (__force void *)src, size); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_READ, src, size)) + return size; +#endif + + if (!__builtin_constant_p(size)) { + check_object_size(dst, size, false); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic(dst, (__force_kernel const void *)src, size); + } switch (size) { - case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src, + case 1:__get_user_asm(*(u8 *)dst, (const u8 __user *)src, ret, "b", "b", "=q", 1); return ret; - case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src, + case 2:__get_user_asm(*(u16 *)dst, (const u16 __user *)src, ret, "w", "w", "=r", 2); return ret; - case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src, + case 4:__get_user_asm(*(u32 *)dst, (const u32 __user *)src, ret, "l", "k", "=r", 4); return ret; - case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src, + case 8:__get_user_asm(*(u64 *)dst, (const u64 __user *)src, ret, "q", "", "=r", 8); return ret; case 10: - __get_user_asm(*(u64 *)dst, (u64 __user *)src, + __get_user_asm(*(u64 *)dst, (const u64 __user *)src, ret, "q", "", "=r", 10); if (unlikely(ret)) return ret; __get_user_asm(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), + (const u16 __user *)(8 + (const char __user *)src), ret, "w", "w", "=r", 2); return ret; case 16: - __get_user_asm(*(u64 *)dst, (u64 __user *)src, + __get_user_asm(*(u64 *)dst, (const u64 __user *)src, ret, "q", "", "=r", 16); if (unlikely(ret)) return ret; __get_user_asm(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), + (const u64 __user *)(8 + (const char __user *)src), ret, "q", "", "=r", 8); return ret; default: - return copy_user_generic(dst, (__force void *)src, size); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic(dst, (__force_kernel const void *)src, size); } } static __always_inline __must_check -int __copy_to_user(void __user *dst, const void *src, unsigned size) +unsigned long __copy_to_user(void __user *dst, const void *src, unsigned long size) { - int ret = 0; + unsigned ret = 0; might_fault(); - if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, src, size); + + pax_track_stack(); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_WRITE, dst, size)) + return size; +#endif + + if (!__builtin_constant_p(size)) { + check_object_size(src, size, true); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)dst < PAX_USER_SHADOW_BASE) + dst += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic((__force_kernel void *)dst, src, size); + } switch (size) { - case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst, + case 1:__put_user_asm(*(const u8 *)src, (u8 __user *)dst, ret, "b", "b", "iq", 1); return ret; - case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst, + case 2:__put_user_asm(*(const u16 *)src, (u16 __user *)dst, ret, "w", "w", "ir", 2); return ret; - case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst, + case 4:__put_user_asm(*(const u32 *)src, (u32 __user *)dst, ret, "l", "k", "ir", 4); return ret; - case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, + case 8:__put_user_asm(*(const u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 8); return ret; case 10: - __put_user_asm(*(u64 *)src, (u64 __user *)dst, + __put_user_asm(*(const u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 10); if (unlikely(ret)) return ret; asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, + __put_user_asm(4[(const u16 *)src], 4 + (u16 __user *)dst, ret, "w", "w", "ir", 2); return ret; case 16: - __put_user_asm(*(u64 *)src, (u64 __user *)dst, + __put_user_asm(*(const u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 16); if (unlikely(ret)) return ret; asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, + __put_user_asm(1[(const u64 *)src], 1 + (u64 __user *)dst, ret, "q", "", "er", 8); return ret; default: - return copy_user_generic((__force void *)dst, src, size); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)dst < PAX_USER_SHADOW_BASE) + dst += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic((__force_kernel void *)dst, src, size); + } +} + +static __always_inline __must_check +unsigned long copy_to_user(void __user *to, const void *from, unsigned long len) +{ + if (access_ok(VERIFY_WRITE, to, len)) + len = __copy_to_user(to, from, len); + return len; +} + +static __always_inline __must_check +unsigned long copy_from_user(void *to, const void __user *from, unsigned long len) +{ + might_fault(); + + if (access_ok(VERIFY_READ, from, len)) + len = __copy_from_user(to, from, len); + else if (len < INT_MAX) { + if (!__builtin_constant_p(len)) + check_object_size(to, len, false); + memset(to, 0, len); } + return len; } static __always_inline __must_check -int __copy_in_user(void __user *dst, const void __user *src, unsigned size) +unsigned long __copy_in_user(void __user *dst, const void __user *src, unsigned long size) { - int ret = 0; + unsigned ret = 0; might_fault(); - if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, - (__force void *)src, size); + + pax_track_stack(); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_READ, src, size)) + return size; + if (!__access_ok(VERIFY_WRITE, dst, size)) + return size; +#endif + + if (!__builtin_constant_p(size)) { + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; + if ((unsigned long)dst < PAX_USER_SHADOW_BASE) + dst += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic((__force_kernel void *)dst, + (__force_kernel const void *)src, size); + } switch (size) { case 1: { u8 tmp; - __get_user_asm(tmp, (u8 __user *)src, + __get_user_asm(tmp, (const u8 __user *)src, ret, "b", "b", "=q", 1); if (likely(!ret)) __put_user_asm(tmp, (u8 __user *)dst, @@ -134,7 +226,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } case 2: { u16 tmp; - __get_user_asm(tmp, (u16 __user *)src, + __get_user_asm(tmp, (const u16 __user *)src, ret, "w", "w", "=r", 2); if (likely(!ret)) __put_user_asm(tmp, (u16 __user *)dst, @@ -144,7 +236,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) case 4: { u32 tmp; - __get_user_asm(tmp, (u32 __user *)src, + __get_user_asm(tmp, (const u32 __user *)src, ret, "l", "k", "=r", 4); if (likely(!ret)) __put_user_asm(tmp, (u32 __user *)dst, @@ -153,7 +245,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } case 8: { u64 tmp; - __get_user_asm(tmp, (u64 __user *)src, + __get_user_asm(tmp, (const u64 __user *)src, ret, "q", "", "=r", 8); if (likely(!ret)) __put_user_asm(tmp, (u64 __user *)dst, @@ -161,8 +253,16 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) return ret; } default: - return copy_user_generic((__force void *)dst, - (__force void *)src, size); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; + if ((unsigned long)dst < PAX_USER_SHADOW_BASE) + dst += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic((__force_kernel void *)dst, + (__force_kernel const void *)src, size); } } @@ -176,33 +276,75 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, - unsigned size); +static __must_check __always_inline unsigned long +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned long size) +{ + pax_track_stack(); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_READ, src, size)) + return size; -static __must_check __always_inline int -__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic(dst, (__force_kernel const void *)src, size); +} + +static __must_check __always_inline unsigned long +__copy_to_user_inatomic(void __user *dst, const void *src, unsigned long size) { - return copy_user_generic((__force void *)dst, src, size); + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_WRITE, dst, size)) + return size; + + if ((unsigned long)dst < PAX_USER_SHADOW_BASE) + dst += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic((__force_kernel void *)dst, src, size); } -extern long __copy_user_nocache(void *dst, const void __user *src, - unsigned size, int zerorest); +extern unsigned long __copy_user_nocache(void *dst, const void __user *src, + unsigned long size, int zerorest); -static inline int -__copy_from_user_nocache(void *dst, const void __user *src, unsigned size) +static inline unsigned long __copy_from_user_nocache(void *dst, const void __user *src, unsigned long size) { might_sleep(); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_READ, src, size)) + return size; +#endif + return __copy_user_nocache(dst, src, size, 1); } -static inline int -__copy_from_user_inatomic_nocache(void *dst, const void __user *src, - unsigned size) +static inline unsigned long __copy_from_user_inatomic_nocache(void *dst, const void __user *src, + unsigned long size) { + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!__access_ok(VERIFY_READ, src, size)) + return size; +#endif + return __copy_user_nocache(dst, src, size, 0); } -unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); +extern unsigned long +copy_user_handle_tail(char __user *to, char __user *from, unsigned long len, unsigned zerorest); #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 9064052..786cfbc 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -25,7 +25,7 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ }) #endif diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3d61e20..9507180 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -14,6 +14,7 @@ struct vsyscall_gtod_data { int sysctl_enabled; struct timezone sys_tz; struct { /* extract of a clocksource struct */ + char name[8]; cycle_t (*vread)(void); cycle_t cycle_last; cycle_t mask; diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h index 61e08c0..b0da582 100644 --- a/arch/x86/include/asm/vmi.h +++ b/arch/x86/include/asm/vmi.h @@ -191,6 +191,7 @@ struct vrom_header { u8 reserved[96]; /* Reserved for headers */ char vmi_init[8]; /* VMI_Init jump point */ char get_reloc[8]; /* VMI_GetRelocationInfo jump point */ + char rom_data[8048]; /* rest of the option ROM */ } __attribute__((packed)); struct pnp_header { diff --git a/arch/x86/include/asm/vmi_time.h b/arch/x86/include/asm/vmi_time.h index c6e0bee..fcb9f74 100644 --- a/arch/x86/include/asm/vmi_time.h +++ b/arch/x86/include/asm/vmi_time.h @@ -43,7 +43,7 @@ extern struct vmi_timer_ops { int (*wallclock_updated)(void); void (*set_alarm)(u32 flags, u64 expiry, u64 period); void (*cancel_alarm)(u32 flags); -} vmi_timer_ops; +} __no_const vmi_timer_ops; /* Prototypes */ extern void __init vmi_time_init(void); diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d0983d2..1f7c9e9 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -15,9 +15,10 @@ enum vsyscall_num { #ifdef __KERNEL__ #include +#include +#include #define __section_vgetcpu_mode __attribute__ ((unused, __section__ (".vgetcpu_mode"), aligned(16))) -#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16))) /* Definitions for CONFIG_GENERIC_TIME definitions */ #define __section_vsyscall_gtod_data __attribute__ \ @@ -31,7 +32,6 @@ enum vsyscall_num { #define VGETCPU_LSL 2 extern int __vgetcpu_mode; -extern volatile unsigned long __jiffies; /* kernel space (writeable) */ extern int vgetcpu_mode; @@ -39,6 +39,9 @@ extern struct timezone sys_tz; extern void map_vsyscall(void); +extern int vgettimeofday(struct timeval * tv, struct timezone * tz); +extern time_t vtime(time_t *t); +extern long vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache); #endif /* __KERNEL__ */ #endif /* _ASM_X86_VSYSCALL_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd..3377e37 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -28,7 +28,7 @@ struct x86_init_mpparse { void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*find_smp_config)(unsigned int reserve); void (*get_smp_config)(unsigned int early); -}; +} __no_const; /** * struct x86_init_resources - platform specific resource related ops @@ -42,7 +42,7 @@ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); -}; +} __no_const; /** * struct x86_init_irqs - platform specific interrupt setup @@ -55,7 +55,7 @@ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); -}; +} __no_const; /** * struct x86_init_oem - oem platform specific customizing functions @@ -65,7 +65,7 @@ struct x86_init_irqs { struct x86_init_oem { void (*arch_setup)(void); void (*banner)(void); -}; +} __no_const; /** * struct x86_init_paging - platform specific paging functions @@ -75,7 +75,7 @@ struct x86_init_oem { struct x86_init_paging { void (*pagetable_setup_start)(pgd_t *base); void (*pagetable_setup_done)(pgd_t *base); -}; +} __no_const; /** * struct x86_init_timers - platform specific timer setup @@ -88,7 +88,7 @@ struct x86_init_timers { void (*setup_percpu_clockev)(void); void (*tsc_pre_init)(void); void (*timer_init)(void); -}; +} __no_const; /** * struct x86_init_ops - functions for platform specific setup @@ -101,7 +101,7 @@ struct x86_init_ops { struct x86_init_oem oem; struct x86_init_paging paging; struct x86_init_timers timers; -}; +} __no_const; /** * struct x86_cpuinit_ops - platform specific cpu hotplug setups @@ -109,7 +109,7 @@ struct x86_init_ops { */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); -}; +} __no_const; /** * struct x86_platform_ops - platform specific runtime functions @@ -121,7 +121,7 @@ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); -}; +} __no_const; extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 727acc1..554f3eb 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -56,6 +56,12 @@ static inline int xrstor_checking(struct xsave_struct *fx) static inline int xsave_user(struct xsave_struct __user *buf) { int err; + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + if ((unsigned long)buf < PAX_USER_SHADOW_BASE) + buf = (struct xsave_struct __user *)((void __user*)buf + PAX_USER_SHADOW_BASE); +#endif + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" "2:\n" ".section .fixup,\"ax\"\n" @@ -78,10 +84,15 @@ static inline int xsave_user(struct xsave_struct __user *buf) static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) { int err; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + struct xsave_struct *xstate = ((__force_kernel struct xsave_struct *)buf); u32 lmask = mask; u32 hmask = mask >> 32; +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + if ((unsigned long)xstate < PAX_USER_SHADOW_BASE) + xstate = (struct xsave_struct *)((void *)xstate + PAX_USER_SHADOW_BASE); +#endif + __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" "2:\n" ".section .fixup,\"ax\"\n" diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 6a564ac..9b1340c 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -41,6 +41,9 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) KBUILD_CFLAGS += $(call cc-option, -m32) +ifdef CONSTIFY_PLUGIN +KBUILD_CFLAGS += $(CONSTIFY_PLUGIN) -fplugin-arg-constify_plugin-no-constify +endif KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 580b4e2..d4129e4 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -91,6 +91,9 @@ _start: /* Do any other stuff... */ #ifndef CONFIG_64BIT + /* Recheck NX bit overrides (64bit path does this in trampoline) */ + call verify_cpu + /* This could also be done in C code... */ movl pmode_cr3, %eax movl %eax, %cr3 @@ -104,7 +107,7 @@ _start: movl %eax, %ecx orl %edx, %ecx jz 1f - movl $0xc0000080, %ecx + mov $MSR_EFER, %ecx wrmsr 1: @@ -114,6 +117,7 @@ _start: movl pmode_cr0, %eax movl %eax, %cr0 jmp pmode_return +# include "../../verify_cpu.S" #else pushw $0 pushw trampoline_segment diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638..7042f24 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -11,11 +11,12 @@ #include #include #include +#include #include "realmode/wakeup.h" #include "sleep.h" -unsigned long acpi_wakeup_address; +unsigned long acpi_wakeup_address = 0x2000; unsigned long acpi_realmode_flags; /* address in low memory of the wakeup routine. */ @@ -98,9 +99,13 @@ int acpi_save_state_mem(void) #else /* CONFIG_64BIT */ header->trampoline_segment = setup_trampoline() >> 4; #ifdef CONFIG_SMP - stack_start.sp = temp_stack + sizeof(temp_stack); + stack_start = (unsigned long)temp_stack + sizeof(temp_stack); + + pax_open_kernel(); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + pax_close_kernel(); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; @@ -134,14 +139,8 @@ void __init acpi_reserve_bootmem(void) return; } - acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); - - if (!acpi_realmode) { - printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); - return; - } - - acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); + reserve_early(acpi_wakeup_address, acpi_wakeup_address + WAKEUP_SIZE, "ACPI Wakeup Code"); + acpi_realmode = (unsigned long)__va(acpi_wakeup_address);; } diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 8ded418..079961e 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -30,13 +30,11 @@ wakeup_pmode_return: # and restore the stack ... but you need gdt for this to work movl saved_context_esp, %esp - movl %cs:saved_magic, %eax - cmpl $0x12345678, %eax + cmpl $0x12345678, saved_magic jne bogus_magic # jump to place where we left off - movl saved_eip, %eax - jmp *%eax + jmp *(saved_eip) bogus_magic: jmp bogus_magic diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c..075da5f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -407,7 +407,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ - memcpy(insnbuf, p->instr, p->len); + memcpy(insnbuf, ktla_ktva(p->instr), p->len); used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, (unsigned long)p->instr, p->len); @@ -475,7 +475,7 @@ void __init alternative_instructions(void) if (smp_alt_once) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); + PAGE_ALIGN((unsigned long)__smp_locks_end)); restart_nmi(); } @@ -492,13 +492,17 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -static void *__init_or_module text_poke_early(void *addr, const void *opcode, +static void *__kprobes text_poke_early(void *addr, const void *opcode, size_t len) { unsigned long flags; local_irq_save(flags); - memcpy(addr, opcode, len); + + pax_open_kernel(); + memcpy(ktla_ktva(addr), opcode, len); sync_core(); + pax_close_kernel(); + local_irq_restore(flags); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ @@ -520,35 +524,21 @@ static void *__init_or_module text_poke_early(void *addr, const void *opcode, */ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) { - unsigned long flags; - char *vaddr; + unsigned char *vaddr = ktla_ktva(addr); struct page *pages[2]; - int i; + size_t i; if (!core_kernel_text((unsigned long)addr)) { - pages[0] = vmalloc_to_page(addr); - pages[1] = vmalloc_to_page(addr + PAGE_SIZE); + pages[0] = vmalloc_to_page(vaddr); + pages[1] = vmalloc_to_page(vaddr + PAGE_SIZE); } else { - pages[0] = virt_to_page(addr); + pages[0] = virt_to_page(vaddr); WARN_ON(!PageReserved(pages[0])); - pages[1] = virt_to_page(addr + PAGE_SIZE); + pages[1] = virt_to_page(vaddr + PAGE_SIZE); } BUG_ON(!pages[0]); - local_irq_save(flags); - set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); - if (pages[1]) - set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); - vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - clear_fixmap(FIX_TEXT_POKE0); - if (pages[1]) - clear_fixmap(FIX_TEXT_POKE1); - local_flush_tlb(); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ + text_poke_early(addr, opcode, len); for (i = 0; i < len; i++) - BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); - local_irq_restore(flags); + BUG_ON((vaddr)[i] != ((const unsigned char *)opcode)[i]); return addr; } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3a44b75..1601800 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2076,7 +2076,7 @@ static void prealloc_protection_domains(void) } } -static struct dma_map_ops amd_iommu_dma_ops = { +static const struct dma_map_ops amd_iommu_dma_ops = { .alloc_coherent = alloc_coherent, .free_coherent = free_coherent, .map_page = map_page, diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1d2d670..8e3f477 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -170,7 +170,7 @@ int first_system_vector = 0xfe; /* * Debug level, exported for io_apic.c */ -unsigned int apic_verbosity; +int apic_verbosity; int pic_mode; @@ -1794,7 +1794,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_write(APIC_ESR, 0); v1 = apic_read(APIC_ESR); ack_APIC_irq(); - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); /* * Here is what the APIC error bits mean: @@ -2184,6 +2184,8 @@ static int __cpuinit apic_cluster_num(void) u16 *bios_cpu_apicid; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + pax_track_stack(); + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8928d97..f799cea 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -716,7 +716,7 @@ struct IO_APIC_route_entry **alloc_ioapic_entries(void) ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, GFP_ATOMIC); if (!ioapic_entries) - return 0; + return NULL; for (apic = 0; apic < nr_ioapics; apic++) { ioapic_entries[apic] = @@ -733,7 +733,7 @@ nomem: kfree(ioapic_entries[apic]); kfree(ioapic_entries); - return 0; + return NULL; } /* @@ -1150,7 +1150,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -void lock_vector_lock(void) +void lock_vector_lock(void) __acquires(vector_lock) { /* Used to the online set of cpus does not change * during assign_irq_vector. @@ -1158,7 +1158,7 @@ void lock_vector_lock(void) spin_lock(&vector_lock); } -void unlock_vector_lock(void) +void unlock_vector_lock(void) __releases(vector_lock) { spin_unlock(&vector_lock); } @@ -2542,7 +2542,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } -atomic_t irq_mis_count; +atomic_unchecked_t irq_mis_count; static void ack_apic_level(unsigned int irq) { @@ -2626,7 +2626,7 @@ static void ack_apic_level(unsigned int irq) /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); + atomic_inc_unchecked(&irq_mis_count); spin_lock(&ioapic_lock); __mask_and_edge_IO_APIC_irq(cfg); __unmask_and_level_IO_APIC_irq(cfg); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace6..f317474 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -410,7 +410,7 @@ static DEFINE_SPINLOCK(user_list_lock); * This is for buggy BIOS's that refer to (real mode) segment 0x40 * even though they are called in protected mode. */ -static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, +static const struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4093, (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1); static const char driver_version[] = "1.16ac"; /* no spaces */ @@ -588,7 +588,10 @@ static long __apm_bios_call(void *_call) BUG_ON(cpu != 0); gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; + + pax_open_kernel(); gdt[0x40 / 8] = bad_bios_desc; + pax_close_kernel(); apm_irq_save(flags); APM_DO_SAVE_SEGS; @@ -597,7 +600,11 @@ static long __apm_bios_call(void *_call) &call->esi); APM_DO_RESTORE_SEGS; apm_irq_restore(flags); + + pax_open_kernel(); gdt[0x40 / 8] = save_desc_40; + pax_close_kernel(); + put_cpu(); return call->eax & 0xff; @@ -664,7 +671,10 @@ static long __apm_bios_call_simple(void *_call) BUG_ON(cpu != 0); gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; + + pax_open_kernel(); gdt[0x40 / 8] = bad_bios_desc; + pax_close_kernel(); apm_irq_save(flags); APM_DO_SAVE_SEGS; @@ -672,7 +682,11 @@ static long __apm_bios_call_simple(void *_call) &call->eax); APM_DO_RESTORE_SEGS; apm_irq_restore(flags); + + pax_open_kernel(); gdt[0x40 / 8] = save_desc_40; + pax_close_kernel(); + put_cpu(); return error; } @@ -975,7 +989,7 @@ recalc: static void apm_power_off(void) { - unsigned char po_bios_call[] = { + const unsigned char po_bios_call[] = { 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ 0x8e, 0xd0, /* movw ax,ss */ 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ @@ -2357,12 +2371,15 @@ static int __init apm_init(void) * code to that CPU. */ gdt = get_cpu_gdt_table(0); + + pax_open_kernel(); set_desc_base(&gdt[APM_CS >> 3], (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4)); set_desc_base(&gdt[APM_CS_16 >> 3], (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4)); set_desc_base(&gdt[APM_DS >> 3], (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); + pax_close_kernel(); proc_create("apm", 0, NULL, &apm_file_ops); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index dfdbf64..9b2b6ce 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -51,7 +51,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_task, thread_info, task); OFFSET(TI_exec_domain, thread_info, exec_domain); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); @@ -60,6 +59,8 @@ void foo(void) OFFSET(TI_restart_block, thread_info, restart_block); OFFSET(TI_sysenter_return, thread_info, sysenter_return); OFFSET(TI_cpu, thread_info, cpu); + OFFSET(TI_lowest_stack, thread_info, lowest_stack); + DEFINE(TI_task_thread_sp0, offsetof(struct task_struct, thread.sp0) - offsetof(struct task_struct, tinfo)); BLANK(); OFFSET(GDS_size, desc_ptr, size); @@ -99,6 +100,7 @@ void foo(void) DEFINE(PAGE_SIZE_asm, PAGE_SIZE); DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); + DEFINE(THREAD_SIZE_asm, THREAD_SIZE); DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); @@ -115,6 +117,11 @@ void foo(void) OFFSET(PV_CPU_iret, pv_cpu_ops, iret); OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); + +#ifdef CONFIG_PAX_KERNEXEC + OFFSET(PV_CPU_write_cr0, pv_cpu_ops, write_cr0); +#endif + #endif #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4a6aeed..371de20 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -44,6 +44,8 @@ int main(void) ENTRY(addr_limit); ENTRY(preempt_count); ENTRY(status); + ENTRY(lowest_stack); + DEFINE(TI_task_thread_sp0, offsetof(struct task_struct, thread.sp0) - offsetof(struct task_struct, tinfo)); #ifdef CONFIG_IA32_EMULATION ENTRY(sysenter_return); #endif @@ -63,6 +65,18 @@ int main(void) OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); + +#ifdef CONFIG_PAX_KERNEXEC + OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); + OFFSET(PV_CPU_write_cr0, pv_cpu_ops, write_cr0); +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF + OFFSET(PV_MMU_read_cr3, pv_mmu_ops, read_cr3); + OFFSET(PV_MMU_write_cr3, pv_mmu_ops, write_cr3); + OFFSET(PV_MMU_set_pgd_batched, pv_mmu_ops, set_pgd_batched); +#endif + #endif @@ -115,6 +129,7 @@ int main(void) ENTRY(cr8); BLANK(); #undef ENTRY + DEFINE(TSS_size, sizeof(struct tss_struct)); DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); BLANK(); DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); @@ -130,6 +145,7 @@ int main(void) BLANK(); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(THREAD_SIZE_asm, THREAD_SIZE); #ifdef CONFIG_XEN BLANK(); OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index ff502cc..dc5133e 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,10 +7,6 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg endif -# Make sure load_percpu_segment has no stackprotector -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_common.o := $(nostackp) - obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6e082dc..a0b5f36 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -602,7 +602,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_mask == 0) size = 64; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4e34d10..ba6bc97 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -83,60 +83,6 @@ static const struct cpu_dev __cpuinitconst default_cpu = { static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { -#ifdef CONFIG_X86_64 - /* - * We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - * - * TLS descriptors are currently at a different place compared to i386. - * Hopefully nobody expects them at a fixed place (Wine?) - */ - [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), -#else - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), - - [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - GDT_STACK_CANARY_INIT -#endif -} }; -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); @@ -344,7 +290,7 @@ void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - gdt_descr.address = (long)get_cpu_gdt_table(cpu); + gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); /* Reload the per-cpu base */ @@ -798,6 +744,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Filter out anything that depends on CPUID levels we don't have */ filter_cpuid_features(c, true); +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)) + setup_clear_cpu_cap(X86_FEATURE_SEP); +#endif + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { const char *p; @@ -980,6 +930,9 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); +DEFINE_PER_CPU(struct thread_info *, current_tinfo) = &init_task.tinfo; +EXPORT_PER_CPU_SYMBOL(current_tinfo); + #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; @@ -995,7 +948,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; + (unsigned long)&init_thread_union - 16 + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(char *, irq_stack_ptr) = @@ -1060,7 +1013,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; - regs->gs = __KERNEL_STACK_CANARY; + savesegment(gs, regs->gs); return regs; } @@ -1101,7 +1054,7 @@ void __cpuinit cpu_init(void) int i; cpu = stack_smp_processor_id(); - t = &per_cpu(init_tss, cpu); + t = init_tss + cpu; orig_ist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1127,7 +1080,7 @@ void __cpuinit cpu_init(void) switch_to_new_gdt(cpu); loadsegment(fs, 0); - load_idt((const struct desc_ptr *)&idt_descr); + load_idt(&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); @@ -1136,7 +1089,6 @@ void __cpuinit cpu_init(void) wrmsrl(MSR_KERNEL_GS_BASE, 0); barrier(); - check_efer(); if (cpu != 0) enable_x2apic(); @@ -1199,7 +1151,7 @@ void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = init_tss + cpu; struct thread_struct *thread = &curr->thread; if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 6a77cca..4f4fca0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -162,7 +162,7 @@ static void __cpuinit trap_init_f00f_bug(void) * Update the IDT descriptor and reload the IDT so that * it uses the read-only mapped virtual address. */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); + idt_descr.address = (struct desc_struct *)fix_to_virt(FIX_F00F_IDT); load_idt(&idt_descr); } #endif diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 417990f..96dc36b 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -921,7 +921,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, return ret; } -static struct sysfs_ops sysfs_ops = { +static const struct sysfs_ops sysfs_ops = { .show = show, .store = store, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 472763d..9831e11 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -211,7 +211,9 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, static int inject_init(void) { printk(KERN_INFO "Machine check injector initialized\n"); - mce_chrdev_ops.write = mce_write; + pax_open_kernel(); + *(void **)&mce_chrdev_ops.write = mce_write; + pax_close_kernel(); register_die_notifier(&mce_raise_nb); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0f16a2b..21740f5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "mce-internal.h" @@ -187,7 +188,7 @@ static void print_mce(struct mce *m) !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->ip); - if (m->cs == __KERNEL_CS) + if (m->cs == __KERNEL_CS || m->cs == __KERNEXEC_KERNEL_CS) print_symbol("{%s}", m->ip); pr_cont("\n"); } @@ -221,10 +222,10 @@ static void print_mce_tail(void) #define PANIC_TIMEOUT 5 /* 5 seconds */ -static atomic_t mce_paniced; +static atomic_unchecked_t mce_paniced; static int fake_panic; -static atomic_t mce_fake_paniced; +static atomic_unchecked_t mce_fake_paniced; /* Panic in progress. Enable interrupts and wait for final IPI */ static void wait_for_panic(void) @@ -248,7 +249,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) /* * Make sure only one CPU runs in machine check panic */ - if (atomic_inc_return(&mce_paniced) > 1) + if (atomic_inc_return_unchecked(&mce_paniced) > 1) wait_for_panic(); barrier(); @@ -256,7 +257,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) console_verbose(); } else { /* Don't log too much for fake panic */ - if (atomic_inc_return(&mce_fake_paniced) > 1) + if (atomic_inc_return_unchecked(&mce_fake_paniced) > 1) return; } print_mce_head(); @@ -616,7 +617,7 @@ static int mce_timed_out(u64 *t) * might have been modified by someone else. */ rmb(); - if (atomic_read(&mce_paniced)) + if (atomic_read_unchecked(&mce_paniced)) wait_for_panic(); if (!monarch_timeout) goto out; @@ -1394,7 +1395,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) } /* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = +void (*machine_check_vector)(struct pt_regs *, long error_code) __read_only = unexpected_machine_check; /* @@ -1416,7 +1417,9 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) return; } + pax_open_kernel(); machine_check_vector = do_machine_check; + pax_close_kernel(); mce_init(); mce_cpu_features(c); @@ -1429,14 +1432,14 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) */ static DEFINE_SPINLOCK(mce_state_lock); -static int open_count; /* #times opened */ +static local_t open_count; /* #times opened */ static int open_exclu; /* already open exclusive? */ static int mce_open(struct inode *inode, struct file *file) { spin_lock(&mce_state_lock); - if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { + if (open_exclu || (local_read(&open_count) && (file->f_flags & O_EXCL))) { spin_unlock(&mce_state_lock); return -EBUSY; @@ -1444,7 +1447,7 @@ static int mce_open(struct inode *inode, struct file *file) if (file->f_flags & O_EXCL) open_exclu = 1; - open_count++; + local_inc(&open_count); spin_unlock(&mce_state_lock); @@ -1455,7 +1458,7 @@ static int mce_release(struct inode *inode, struct file *file) { spin_lock(&mce_state_lock); - open_count--; + local_dec(&open_count); open_exclu = 0; spin_unlock(&mce_state_lock); @@ -2082,7 +2085,7 @@ struct dentry *mce_get_debugfs_dir(void) static void mce_reset(void) { cpu_missing = 0; - atomic_set(&mce_fake_paniced, 0); + atomic_set_unchecked(&mce_fake_paniced, 0); atomic_set(&mce_executing, 0); atomic_set(&mce_callin, 0); atomic_set(&global_nwo, 0); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ef3cd31..9d2f6ab 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -385,7 +385,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, return ret; } -static struct sysfs_ops threshold_ops = { +static const struct sysfs_ops threshold_ops = { .show = show, .store = store, }; diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 5c0e653..1e82c7c 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -50,7 +50,9 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_MCE)) return; + pax_open_kernel(); machine_check_vector = pentium_machine_check; + pax_close_kernel(); /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 54060f5..e6ba93d 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -24,7 +24,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; + pax_open_kernel(); machine_check_vector = winchip_machine_check; + pax_close_kernel(); /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 33af141..92ba9cd 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) return 0; } -static struct mtrr_ops amd_mtrr_ops = { +static const struct mtrr_ops amd_mtrr_ops = { .vendor = X86_VENDOR_AMD, .set = amd_set_mtrr, .get = amd_get_mtrr, diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index de89f14..316fe3e 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t return 0; } -static struct mtrr_ops centaur_mtrr_ops = { +static const struct mtrr_ops centaur_mtrr_ops = { .vendor = X86_VENDOR_CENTAUR, .set = centaur_set_mcr, .get = centaur_get_mcr, diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 228d982..68a3343 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -265,7 +265,7 @@ static void cyrix_set_all(void) post_set(); } -static struct mtrr_ops cyrix_mtrr_ops = { +static const struct mtrr_ops cyrix_mtrr_ops = { .vendor = X86_VENDOR_CYRIX, .set_all = cyrix_set_all, .set = cyrix_set_arr, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55da0c5..4d75584 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -752,7 +752,7 @@ int positive_have_wrcomb(void) /* * Generic structure... */ -struct mtrr_ops generic_mtrr_ops = { +const struct mtrr_ops generic_mtrr_ops = { .use_intel_if = 1, .set_all = generic_set_all, .get = generic_get_mtrr, diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index fd60f09..c94ef52 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; static bool mtrr_aps_delayed_init; -static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; +static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __read_only; -struct mtrr_ops *mtrr_if; +const struct mtrr_ops *mtrr_if; static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); -void set_mtrr_ops(struct mtrr_ops *ops) +void set_mtrr_ops(const struct mtrr_ops *ops) { if (ops->vendor && ops->vendor < X86_VENDOR_NUM) mtrr_ops[ops->vendor] = ops; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index a501dee..816c719 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -25,14 +25,14 @@ struct mtrr_ops { int (*validate_add_page)(unsigned long base, unsigned long size, unsigned int type); int (*have_wrcomb)(void); -}; +} __do_const; extern int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg); extern int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type); -extern struct mtrr_ops generic_mtrr_ops; +extern const struct mtrr_ops generic_mtrr_ops; extern int positive_have_wrcomb(void); @@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); void get_mtrr_state(void); -extern void set_mtrr_ops(struct mtrr_ops *ops); +extern void set_mtrr_ops(const struct mtrr_ops *ops); extern u64 size_or_mask, size_and_mask; -extern struct mtrr_ops *mtrr_if; +extern const struct mtrr_ops *mtrr_if; #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0ff02ca..fc49a60 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -723,10 +723,10 @@ x86_perf_event_update(struct perf_event *event, * count to the generic event atomically: */ again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = atomic64_read_unchecked(&hwc->prev_count); rdmsrl(hwc->event_base + idx, new_raw_count); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (atomic64_cmpxchg_unchecked(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -741,7 +741,7 @@ again: delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - atomic64_add(delta, &event->count); + atomic64_add_unchecked(delta, &event->count); atomic64_sub(delta, &hwc->period_left); return new_raw_count; @@ -1353,7 +1353,7 @@ x86_perf_event_set_period(struct perf_event *event, * The hw event starts counting from this event offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)-left); + atomic64_set_unchecked(&hwc->prev_count, (u64)-left); err = checking_wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.event_mask); @@ -2357,7 +2357,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) break; callchain_store(entry, frame.return_address); - fp = frame.next_frame; + fp = (__force const void __user *)frame.next_frame; } } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 898df97..9e82503 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -30,11 +30,11 @@ struct nmi_watchdog_ctlblk { /* Interface defining a CPU specific perfctr watchdog */ struct wd_ops { - int (*reserve)(void); - void (*unreserve)(void); - int (*setup)(unsigned nmi_hz); - void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); - void (*stop)(void); + int (* const reserve)(void); + void (* const unreserve)(void); + int (* const setup)(unsigned nmi_hz); + void (* const rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); + void (* const stop)(void); unsigned perfctr; unsigned evntsel; u64 checkbit; @@ -645,6 +645,7 @@ static const struct wd_ops p4_wd_ops = { #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK +/* cannot be const */ static struct wd_ops intel_arch_wd_ops; static int setup_intel_arch_watchdog(unsigned nmi_hz) @@ -697,6 +698,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) return 1; } +/* cannot be const */ static struct wd_ops intel_arch_wd_ops __read_mostly = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ff95824..2ffdcb5 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -41,7 +41,7 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) regs = args->regs; #ifdef CONFIG_X86_32 - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 37250fe..bf2ec74 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -11,7 +11,7 @@ #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) +#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) @@ -21,7 +21,7 @@ static void doublefault_fn(void) unsigned long gdt, tss; store_gdt(&gdt_desc); - gdt = gdt_desc.address; + gdt = (unsigned long)gdt_desc.address; printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); @@ -58,10 +58,10 @@ struct tss_struct doublefault_tss __cacheline_aligned = { /* 0x2 bit is always set */ .flags = X86_EFLAGS_SF | 0x2, .sp = STACK_START, - .es = __USER_DS, + .es = __KERNEL_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, - .ds = __USER_DS, + .ds = __KERNEL_DS, .fs = __KERNEL_PERCPU, .__cr3 = __pa_nodebug(swapper_pg_dir), diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371..4fa6ae6 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -2,6 +2,9 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ +#ifdef CONFIG_GRKERNSEC_HIDESYM +#define __INCLUDED_BY_HIDESYM 1 +#endif #include #include #include @@ -28,7 +31,7 @@ static int die_counter; void printk_address(unsigned long address, int reliable) { - printk(" [<%p>] %s%pS\n", (void *) address, + printk(" [<%p>] %s%pA\n", (void *) address, reliable ? "" : "? ", (void *) address); } @@ -36,9 +39,8 @@ void printk_address(unsigned long address, int reliable) static void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { - struct task_struct *task = tinfo->task; unsigned long ret_addr; int index = task->curr_ret_stack; @@ -59,7 +61,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { } #endif @@ -70,10 +72,8 @@ print_ftrace_graph_addr(unsigned long addr, void *data, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) +static inline int valid_stack_ptr(void *t, void *p, unsigned int size, void *end) { - void *t = tinfo; if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; @@ -84,14 +84,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, } unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, void *stack_start, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + while (valid_stack_ptr(stack_start, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; @@ -103,7 +103,7 @@ print_context_stack(struct thread_info *tinfo, } else { ops->address(data, addr, 0); } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } stack++; } @@ -180,7 +180,7 @@ void dump_stack(void) #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), + task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); @@ -220,6 +220,8 @@ unsigned __kprobes long oops_begin(void) return flags; } +extern void gr_handle_kernel_exploit(void); + void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) @@ -241,7 +243,10 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + + gr_handle_kernel_exploit(); + + do_group_exit(signr); } int __kprobes __die(const char *str, struct pt_regs *regs, long err) @@ -295,7 +300,7 @@ void die(const char *str, struct pt_regs *regs, long err) unsigned long flags = oops_begin(); int sig = SIGSEGV; - if (!user_mode_vm(regs)) + if (!user_mode(regs)) report_bug(regs->ip, regs); if (__die(str, regs, err)) diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 81086c2..13e8b17 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -15,7 +15,7 @@ #endif extern unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, void *stack_start, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7..504f53b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -53,16 +53,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, #endif for (;;) { - struct thread_info *context; + void *stack_start = (void *)((unsigned long)stack & ~(THREAD_SIZE-1)); + bp = print_context_stack(task, stack_start, stack, bp, ops, data, NULL, &graph); - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); - - stack = (unsigned long *)context->previous_esp; - if (!stack) + if (stack_start == task_stack_page(task)) break; + stack = *(unsigned long **)stack_start; if (ops->stack(data, "IRQ") < 0) break; touch_nmi_watchdog(); @@ -112,11 +108,12 @@ void show_registers(struct pt_regs *regs) * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; u8 *ip; + unsigned long cs_base = get_desc_base(&get_cpu_gdt_table(smp_processor_id())[(0xffff & regs->cs) >> 3]); printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, ®s->sp, @@ -124,10 +121,10 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); - ip = (u8 *)regs->ip - code_prologue; + ip = (u8 *)regs->ip - code_prologue + cs_base; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { /* try starting at IP */ - ip = (u8 *)regs->ip; + ip = (u8 *)regs->ip + cs_base; code_len = code_len - code_prologue + 1; } for (i = 0; i < code_len; i++, ip++) { @@ -136,7 +133,7 @@ void show_registers(struct pt_regs *regs) printk(" Bad EIP value."); break; } - if (ip == (u8 *)regs->ip) + if (ip == (u8 *)regs->ip + cs_base) printk("<%02x> ", c); else printk("%02x ", c); @@ -145,10 +142,23 @@ void show_registers(struct pt_regs *regs) printk("\n"); } +#ifdef CONFIG_PAX_MEMORY_STACKLEAK +void pax_check_alloca(unsigned long size) +{ + unsigned long sp = (unsigned long)&sp, stack_left; + + /* all kernel stacks are of the same size */ + stack_left = sp & (THREAD_SIZE - 1); + BUG_ON(stack_left < 256 || size >= stack_left - 256); +} +EXPORT_SYMBOL(pax_check_alloca); +#endif + int is_valid_bugaddr(unsigned long ip) { unsigned short ud2; + ip = ktla_ktva(ip); if (ip < PAGE_OFFSET) return 0; if (probe_kernel_address((unsigned short *)ip, ud2)) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6b..36cd585 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -116,8 +116,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *irq_stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; - struct thread_info *tinfo; int graph = 0; + void *stack_start; if (!task) task = current; @@ -146,10 +146,10 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * current stack address. If the stacks consist of nested * exceptions */ - tinfo = task_thread_info(task); for (;;) { char *id; unsigned long *estack_end; + estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id); @@ -157,7 +157,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = print_context_stack(tinfo, stack, bp, ops, + bp = print_context_stack(task, estack_end - EXCEPTION_STKSZ, stack, bp, ops, data, estack_end, &graph); ops->stack(data, ""); /* @@ -176,7 +176,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (stack >= irq_stack && stack < irq_stack_end) { if (ops->stack(data, "IRQ") < 0) break; - bp = print_context_stack(tinfo, stack, bp, + bp = print_context_stack(task, irq_stack, stack, bp, ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be @@ -195,7 +195,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); + stack_start = (void *)((unsigned long)stack & ~(THREAD_SIZE-1)); + bp = print_context_stack(task, stack_start, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -304,3 +305,50 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } + +#ifdef CONFIG_PAX_MEMORY_STACKLEAK +void pax_check_alloca(unsigned long size) +{ + unsigned long sp = (unsigned long)&sp, stack_start, stack_end; + unsigned cpu, used; + char *id; + + /* check the process stack first */ + stack_start = (unsigned long)task_stack_page(current); + stack_end = stack_start + THREAD_SIZE; + if (likely(stack_start <= sp && sp < stack_end)) { + unsigned long stack_left = sp & (THREAD_SIZE - 1); + BUG_ON(stack_left < 256 || size >= stack_left - 256); + return; + } + + cpu = get_cpu(); + + /* check the irq stacks */ + stack_end = (unsigned long)per_cpu(irq_stack_ptr, cpu); + stack_start = stack_end - IRQ_STACK_SIZE; + if (stack_start <= sp && sp < stack_end) { + unsigned long stack_left = sp & (IRQ_STACK_SIZE - 1); + put_cpu(); + BUG_ON(stack_left < 256 || size >= stack_left - 256); + return; + } + + /* check the exception stacks */ + used = 0; + stack_end = (unsigned long)in_exception_stack(cpu, sp, &used, &id); + stack_start = stack_end - EXCEPTION_STKSZ; + if (stack_end && stack_start <= sp && sp < stack_end) { + unsigned long stack_left = sp & (EXCEPTION_STKSZ - 1); + put_cpu(); + BUG_ON(stack_left < 256 || size >= stack_left - 256); + return; + } + + put_cpu(); + + /* unknown stack */ + BUG(); +} +EXPORT_SYMBOL(pax_check_alloca); +#endif diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a89739a..95e0c48 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -733,7 +733,7 @@ struct early_res { }; static struct early_res early_res[MAX_EARLY_RES] __initdata = { { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ - {} + { 0, 0, {0}, 0 } }; static int __init find_overlapped_early(u64 start, u64 end) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index b9c830c..1e41a96 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,8 @@ asmlinkage void early_printk(const char *fmt, ...) int n; va_list ap; + pax_track_stack(); + va_start(ap, fmt); n = vscnprintf(buf, sizeof(buf), fmt, ap); early_console->write(early_console, buf, n); diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 5cab48e..b025f9b 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -38,70 +38,56 @@ */ static unsigned long efi_rt_eflags; -static pgd_t efi_bak_pg_dir_pointer[2]; +static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS]; -void efi_call_phys_prelog(void) +void __init efi_call_phys_prelog(void) { - unsigned long cr4; - unsigned long temp; struct desc_ptr gdt_descr; +#ifdef CONFIG_PAX_KERNEXEC + struct desc_struct d; +#endif + local_irq_save(efi_rt_eflags); - /* - * If I don't have PAE, I should just duplicate two entries in page - * directory. If I have PAE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4_safe(); - - if (cr4 & X86_CR4_PAE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } + clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); /* * After the lock is released, the original page table is restored. */ __flush_tlb_all(); +#ifdef CONFIG_PAX_KERNEXEC + pack_descriptor(&d, 0, 0xFFFFF, 0x9B, 0xC); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_CS, &d, DESCTYPE_S); + pack_descriptor(&d, 0, 0xFFFFF, 0x93, 0xC); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_DS, &d, DESCTYPE_S); +#endif + gdt_descr.address = __pa(get_cpu_gdt_table(0)); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); } -void efi_call_phys_epilog(void) +void __init efi_call_phys_epilog(void) { - unsigned long cr4; struct desc_ptr gdt_descr; +#ifdef CONFIG_PAX_KERNEXEC + struct desc_struct d; + + memset(&d, 0, sizeof d); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_CS, &d, DESCTYPE_S); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_DS, &d, DESCTYPE_S); +#endif + gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - cr4 = read_cr4_safe(); - - if (cr4 & X86_CR4_PAE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } + clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); /* * After the lock is released, the original page table is restored. diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S index fbe66e6..c5c0dd2 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/kernel/efi_stub_32.S @@ -6,7 +6,9 @@ */ #include +#include #include +#include /* * efi_call_phys(void *, ...) is a function with variable parameters. @@ -20,7 +22,7 @@ * service functions will comply with gcc calling convention, too. */ -.text +__INIT ENTRY(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been @@ -36,9 +38,11 @@ ENTRY(efi_call_phys) * The mapping of lower virtual memory has been created in prelog and * epilog. */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx + movl $(__KERNEXEC_EFI_DS), %edx + mov %edx, %ds + mov %edx, %es + mov %edx, %ss + ljmp $(__KERNEXEC_EFI_CS),$1f-__PAGE_OFFSET 1: /* @@ -47,14 +51,8 @@ ENTRY(efi_call_phys) * parameter 2, ..., param n. To make things easy, we save the return * address of efi_call_phys in a global variable. */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx + popl (saved_return_addr) + popl (efi_rt_function_ptr) /* * 3. Clear PG bit in %CR0. @@ -73,9 +71,8 @@ ENTRY(efi_call_phys) /* * 5. Call the physical function. */ - jmp *%ecx + call *(efi_rt_function_ptr-__PAGE_OFFSET) -2: /* * 6. After EFI runtime service returns, control will return to * following instruction. We'd better readjust stack pointer first. @@ -88,35 +85,32 @@ ENTRY(efi_call_phys) movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: + /* * 8. Now restore the virtual mode from flat mode by * adding EIP with PAGE_OFFSET. */ - movl $1f, %edx - jmp *%edx + ljmp $(__KERNEL_CS),$1f+__PAGE_OFFSET 1: + movl $(__KERNEL_DS), %edx + mov %edx, %ds + mov %edx, %es + mov %edx, %ss /* * 9. Balance the stack. And because EAX contain the return value, * we'd better not clobber it. */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx + pushl (efi_rt_function_ptr) /* - * 10. Push the saved return address onto the stack and return. + * 10. Return to the saved return address. */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret + jmpl *(saved_return_addr) ENDPROC(efi_call_phys) .previous -.data +__INITDATA saved_return_addr: .long 0 efi_rt_function_ptr: diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S index 4c07cca..2c8427d 100644 --- a/arch/x86/kernel/efi_stub_64.S +++ b/arch/x86/kernel/efi_stub_64.S @@ -7,6 +7,7 @@ */ #include +#include #define SAVE_XMM \ mov %rsp, %rax; \ @@ -40,6 +41,7 @@ ENTRY(efi_call0) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call0) @@ -50,6 +52,7 @@ ENTRY(efi_call1) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call1) @@ -60,6 +63,7 @@ ENTRY(efi_call2) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call2) @@ -71,6 +75,7 @@ ENTRY(efi_call3) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call3) @@ -83,6 +88,7 @@ ENTRY(efi_call4) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call4) @@ -96,6 +102,7 @@ ENTRY(efi_call5) call *%rdi addq $48, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call5) @@ -112,5 +119,6 @@ ENTRY(efi_call6) call *%rdi addq $48, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call6) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d..c689cf4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -185,13 +185,146 @@ /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg + +#ifdef CONFIG_CC_STACKPROTECTOR movl $(__KERNEL_STACK_CANARY), \reg +#elif defined(CONFIG_PAX_MEMORY_UDEREF) + movl $(__USER_DS), \reg +#else + xorl \reg, \reg +#endif + movl \reg, %gs .endm #endif /* CONFIG_X86_32_LAZY_GS */ -.macro SAVE_ALL +.macro pax_enter_kernel +#ifdef CONFIG_PAX_KERNEXEC + call pax_enter_kernel +#endif +.endm + +.macro pax_exit_kernel +#ifdef CONFIG_PAX_KERNEXEC + call pax_exit_kernel +#endif +.endm + +#ifdef CONFIG_PAX_KERNEXEC +ENTRY(pax_enter_kernel) +#ifdef CONFIG_PARAVIRT + pushl %eax + pushl %ecx + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0) + mov %eax, %esi +#else + mov %cr0, %esi +#endif + bts $16, %esi + jnc 1f + mov %cs, %esi + cmp $__KERNEL_CS, %esi + jz 3f + ljmp $__KERNEL_CS, $3f +1: ljmp $__KERNEXEC_KERNEL_CS, $2f +2: +#ifdef CONFIG_PARAVIRT + mov %esi, %eax + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0) +#else + mov %esi, %cr0 +#endif +3: +#ifdef CONFIG_PARAVIRT + popl %ecx + popl %eax +#endif + ret +ENDPROC(pax_enter_kernel) + +ENTRY(pax_exit_kernel) +#ifdef CONFIG_PARAVIRT + pushl %eax + pushl %ecx +#endif + mov %cs, %esi + cmp $__KERNEXEC_KERNEL_CS, %esi + jnz 2f +#ifdef CONFIG_PARAVIRT + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); + mov %eax, %esi +#else + mov %cr0, %esi +#endif + btr $16, %esi + ljmp $__KERNEL_CS, $1f +1: +#ifdef CONFIG_PARAVIRT + mov %esi, %eax + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0); +#else + mov %esi, %cr0 +#endif +2: +#ifdef CONFIG_PARAVIRT + popl %ecx + popl %eax +#endif + ret +ENDPROC(pax_exit_kernel) +#endif + +.macro pax_erase_kstack +#ifdef CONFIG_PAX_MEMORY_STACKLEAK + call pax_erase_kstack +#endif +.endm + +#ifdef CONFIG_PAX_MEMORY_STACKLEAK +/* + * ebp: thread_info + * ecx, edx: can be clobbered + */ +ENTRY(pax_erase_kstack) + pushl %edi + pushl %eax + + mov TI_lowest_stack(%ebp), %edi + mov $-0xBEEF, %eax + std + +1: mov %edi, %ecx + and $THREAD_SIZE_asm - 1, %ecx + shr $2, %ecx + repne scasl + jecxz 2f + + cmp $2*16, %ecx + jc 2f + + mov $2*16, %ecx + repe scasl + jecxz 2f + jne 1b + +2: cld + mov %esp, %ecx + sub %edi, %ecx + shr $2, %ecx + rep stosl + + mov TI_task_thread_sp0(%ebp), %edi + sub $128, %edi + mov %edi, TI_lowest_stack(%ebp) + + popl %eax + popl %edi + ret +ENDPROC(pax_erase_kstack) +#endif + +.macro __SAVE_ALL _DS cld PUSH_GS pushl %fs @@ -224,7 +357,7 @@ pushl %ebx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 - movl $(__USER_DS), %edx + movl $\_DS, %edx movl %edx, %ds movl %edx, %es movl $(__KERNEL_PERCPU), %edx @@ -232,6 +365,15 @@ SET_KERNEL_GS %edx .endm +.macro SAVE_ALL +#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) + __SAVE_ALL __KERNEL_DS + pax_enter_kernel +#else + __SAVE_ALL __USER_DS +#endif +.endm + .macro RESTORE_INT_REGS popl %ebx CFI_ADJUST_CFA_OFFSET -4 @@ -331,7 +473,7 @@ ENTRY(ret_from_fork) CFI_ADJUST_CFA_OFFSET -4 jmp syscall_exit CFI_ENDPROC -END(ret_from_fork) +ENDPROC(ret_from_fork) /* * Return to user mode is not as complex as all this looks, @@ -352,7 +494,15 @@ check_userspace: movb PT_CS(%esp), %al andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax + +#ifdef CONFIG_PAX_KERNEXEC + jae resume_userspace + + PAX_EXIT_KERNEL + jmp resume_kernel +#else jb resume_kernel # not returning to v8086 or userspace +#endif ENTRY(resume_userspace) LOCKDEP_SYS_EXIT @@ -364,8 +514,8 @@ ENTRY(resume_userspace) andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending - jmp restore_all -END(ret_from_exception) + jmp restore_all_pax +ENDPROC(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) @@ -380,7 +530,7 @@ need_resched: jz restore_all call preempt_schedule_irq jmp need_resched -END(resume_kernel) +ENDPROC(resume_kernel) #endif CFI_ENDPROC @@ -414,25 +564,36 @@ sysenter_past_esp: /*CFI_REL_OFFSET cs, 0*/ /* * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) + pushl $0 CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 pushl %eax CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + GET_THREAD_INFO(%ebp) + movl TI_sysenter_return(%ebp),%ebp + movl %ebp,PT_EIP(%esp) ENABLE_INTERRUPTS(CLBR_NONE) /* * Load the potential sixth argument from user stack. * Careful about security. */ + movl PT_OLDESP(%esp),%ebp + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov PT_OLDSS(%esp),%ds +1: movl %ds:(%ebp),%ebp + push %ss + pop %ds +#else cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp +#endif + movl %ebp,PT_EBP(%esp) .section __ex_table,"a" .align 4 @@ -455,12 +616,24 @@ sysenter_do_call: testl $_TIF_ALLWORK_MASK, %ecx jne sysexit_audit sysenter_exit: + +#ifdef CONFIG_PAX_RANDKSTACK + pushl_cfi %eax + movl %esp, %eax + call pax_randomize_kstack + popl_cfi %eax +#endif + + pax_erase_kstack + /* if something modifies registers it must also disable sysexit */ movl PT_EIP(%esp), %edx movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs +2: mov PT_DS(%esp), %ds +3: mov PT_ES(%esp), %es PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT @@ -477,6 +650,9 @@ sysenter_audit: movl %eax,%edx /* 2nd arg: syscall number */ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ call audit_syscall_entry + + pax_erase_kstack + pushl %ebx CFI_ADJUST_CFA_OFFSET 4 movl PT_EAX(%esp),%eax /* reload syscall number */ @@ -504,11 +680,17 @@ sysexit_audit: CFI_ENDPROC .pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) +4: movl $0,PT_FS(%esp) + jmp 1b +5: movl $0,PT_DS(%esp) + jmp 1b +6: movl $0,PT_ES(%esp) jmp 1b .section __ex_table,"a" .align 4 - .long 1b,2b + .long 1b,4b + .long 2b,5b + .long 3b,6b .popsection PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) @@ -538,6 +720,15 @@ syscall_exit: testl $_TIF_ALLWORK_MASK, %ecx # current->work jne syscall_exit_work +restore_all_pax: + +#ifdef CONFIG_PAX_RANDKSTACK + movl %esp, %eax + call pax_randomize_kstack +#endif + + pax_erase_kstack + restore_all: TRACE_IRQS_IRET restore_all_notrace: @@ -602,10 +793,29 @@ ldt_ss: mov PT_OLDESP(%esp), %eax /* load userspace esp */ mov %dx, %ax /* eax: new kernel esp */ sub %eax, %edx /* offset (low word is 0) */ - PER_CPU(gdt_page, %ebx) +#ifdef CONFIG_SMP + movl PER_CPU_VAR(cpu_number), %ebx + shll $PAGE_SHIFT_asm, %ebx + addl $cpu_gdt_table, %ebx +#else + movl $cpu_gdt_table, %ebx +#endif shr $16, %edx + +#ifdef CONFIG_PAX_KERNEXEC + mov %cr0, %esi + btr $16, %esi + mov %esi, %cr0 +#endif + mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ + +#ifdef CONFIG_PAX_KERNEXEC + bts $16, %esi + mov %esi, %cr0 +#endif + pushl $__ESPFIX_SS CFI_ADJUST_CFA_OFFSET 4 push %eax /* new kernel esp */ @@ -636,36 +846,30 @@ work_resched: movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? - jz restore_all + jz restore_all_pax testb $_TIF_NEED_RESCHED, %cl jnz work_resched work_notifysig: # deal with pending signals and # notify-resume requests + movl %esp, %eax #ifdef CONFIG_VM86 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jne work_notifysig_v86 # returning to kernel-space or + jz 1f # returning to kernel-space or # vm86-space - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig - ALIGN -work_notifysig_v86: pushl %ecx # save ti_flags for do_notify_resume CFI_ADJUST_CFA_OFFSET 4 call save_v86_state # %eax contains pt_regs pointer popl %ecx CFI_ADJUST_CFA_OFFSET -4 movl %eax, %esp -#else - movl %esp, %eax +1: #endif xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig -END(work_pending) +ENDPROC(work_pending) # perform syscall exit tracing ALIGN @@ -673,11 +877,14 @@ syscall_trace_entry: movl $-ENOSYS,PT_EAX(%esp) movl %esp, %eax call syscall_trace_enter + + pax_erase_kstack + /* What it returned is what we'll actually use. */ cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit -END(syscall_trace_entry) +ENDPROC(syscall_trace_entry) # perform syscall exit tracing ALIGN @@ -690,20 +897,24 @@ syscall_exit_work: movl %esp, %eax call syscall_trace_leave jmp resume_userspace -END(syscall_exit_work) +ENDPROC(syscall_exit_work) CFI_ENDPROC RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: +#ifdef CONFIG_PAX_MEMORY_UDEREF + push %ss + pop %ds +#endif GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace -END(syscall_fault) +ENDPROC(syscall_fault) syscall_badsys: movl $-ENOSYS,PT_EAX(%esp) jmp resume_userspace -END(syscall_badsys) +ENDPROC(syscall_badsys) CFI_ENDPROC /* @@ -726,6 +937,33 @@ PTREGSCALL(rt_sigreturn) PTREGSCALL(vm86) PTREGSCALL(vm86old) + ALIGN; +ENTRY(kernel_execve) + push %ebp + sub $PT_OLDSS+4,%esp + push %edi + push %ecx + push %eax + lea 3*4(%esp),%edi + mov $PT_OLDSS/4+1,%ecx + xorl %eax,%eax + rep stosl + pop %eax + pop %ecx + pop %edi + movl $X86_EFLAGS_IF,PT_EFLAGS(%esp) + mov %eax,PT_EBX(%esp) + mov %edx,PT_ECX(%esp) + mov %ecx,PT_EDX(%esp) + mov %esp,%eax + call sys_execve + GET_THREAD_INFO(%ebp) + test %eax,%eax + jz syscall_exit + add $PT_OLDSS+4,%esp + pop %ebp + ret + .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack @@ -735,7 +973,13 @@ PTREGSCALL(vm86old) * normal stack and adjusts ESP with the matching offset. */ /* fixup the stack */ - PER_CPU(gdt_page, %ebx) +#ifdef CONFIG_SMP + movl PER_CPU_VAR(cpu_number), %ebx + shll $PAGE_SHIFT_asm, %ebx + addl $cpu_gdt_table, %ebx +#else + movl $cpu_gdt_table, %ebx +#endif mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ shl $16, %eax @@ -793,7 +1037,7 @@ vector=vector+1 .endr 2: jmp common_interrupt .endr -END(irq_entries_start) +ENDPROC(irq_entries_start) .previous END(interrupt) @@ -840,7 +1084,7 @@ ENTRY(coprocessor_error) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(coprocessor_error) +ENDPROC(coprocessor_error) ENTRY(simd_coprocessor_error) RING0_INT_FRAME @@ -850,7 +1094,7 @@ ENTRY(simd_coprocessor_error) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(simd_coprocessor_error) +ENDPROC(simd_coprocessor_error) ENTRY(device_not_available) RING0_INT_FRAME @@ -860,7 +1104,7 @@ ENTRY(device_not_available) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(device_not_available) +ENDPROC(device_not_available) #ifdef CONFIG_PARAVIRT ENTRY(native_iret) @@ -869,12 +1113,12 @@ ENTRY(native_iret) .align 4 .long native_iret, iret_exc .previous -END(native_iret) +ENDPROC(native_iret) ENTRY(native_irq_enable_sysexit) sti sysexit -END(native_irq_enable_sysexit) +ENDPROC(native_irq_enable_sysexit) #endif ENTRY(overflow) @@ -885,7 +1129,7 @@ ENTRY(overflow) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(overflow) +ENDPROC(overflow) ENTRY(bounds) RING0_INT_FRAME @@ -895,7 +1139,7 @@ ENTRY(bounds) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(bounds) +ENDPROC(bounds) ENTRY(invalid_op) RING0_INT_FRAME @@ -905,7 +1149,7 @@ ENTRY(invalid_op) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(invalid_op) +ENDPROC(invalid_op) ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME @@ -915,7 +1159,7 @@ ENTRY(coprocessor_segment_overrun) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(coprocessor_segment_overrun) +ENDPROC(coprocessor_segment_overrun) ENTRY(invalid_TSS) RING0_EC_FRAME @@ -923,7 +1167,7 @@ ENTRY(invalid_TSS) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(invalid_TSS) +ENDPROC(invalid_TSS) ENTRY(segment_not_present) RING0_EC_FRAME @@ -931,7 +1175,7 @@ ENTRY(segment_not_present) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(segment_not_present) +ENDPROC(segment_not_present) ENTRY(stack_segment) RING0_EC_FRAME @@ -939,7 +1183,7 @@ ENTRY(stack_segment) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(stack_segment) +ENDPROC(stack_segment) ENTRY(alignment_check) RING0_EC_FRAME @@ -947,7 +1191,7 @@ ENTRY(alignment_check) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(alignment_check) +ENDPROC(alignment_check) ENTRY(divide_error) RING0_INT_FRAME @@ -957,7 +1201,7 @@ ENTRY(divide_error) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(divide_error) +ENDPROC(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) @@ -968,7 +1212,7 @@ ENTRY(machine_check) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(machine_check) +ENDPROC(machine_check) #endif ENTRY(spurious_interrupt_bug) @@ -979,7 +1223,7 @@ ENTRY(spurious_interrupt_bug) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(spurious_interrupt_bug) +ENDPROC(spurious_interrupt_bug) ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder @@ -1095,7 +1339,7 @@ ENDPROC(xen_failsafe_callback) ENTRY(mcount) ret -END(mcount) +ENDPROC(mcount) ENTRY(ftrace_caller) cmpl $0, function_trace_stop @@ -1124,7 +1368,7 @@ ftrace_graph_call: .globl ftrace_stub ftrace_stub: ret -END(ftrace_caller) +ENDPROC(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -1160,7 +1404,7 @@ trace: popl %ecx popl %eax jmp ftrace_stub -END(mcount) +ENDPROC(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -1181,7 +1425,7 @@ ENTRY(ftrace_graph_caller) popl %ecx popl %eax ret -END(ftrace_graph_caller) +ENDPROC(ftrace_graph_caller) .globl return_to_handler return_to_handler: @@ -1198,7 +1442,6 @@ return_to_handler: ret #endif -.section .rodata,"a" #include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) @@ -1255,15 +1498,18 @@ error_code: movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart REG_TO_PTGS %ecx SET_KERNEL_GS %ecx - movl $(__USER_DS), %ecx + movl $(__KERNEL_DS), %ecx movl %ecx, %ds movl %ecx, %es + + pax_enter_kernel + TRACE_IRQS_OFF movl %esp,%eax # pt_regs pointer call *%edi jmp ret_from_exception CFI_ENDPROC -END(page_fault) +ENDPROC(page_fault) /* * Debug traps and NMI can happen at the one SYSENTER instruction @@ -1309,7 +1555,7 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC -END(debug) +ENDPROC(debug) /* * NMI is doubly nasty. It can happen _while_ we're handling @@ -1351,6 +1597,9 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi + + pax_exit_kernel + jmp restore_all_notrace CFI_ENDPROC @@ -1391,12 +1640,15 @@ nmi_espfix_stack: FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi + + pax_exit_kernel + RESTORE_REGS lss 12+4(%esp), %esp # back to espfix stack CFI_ADJUST_CFA_OFFSET -24 jmp irq_return CFI_ENDPROC -END(nmi) +ENDPROC(nmi) ENTRY(int3) RING0_INT_FRAME @@ -1409,7 +1661,7 @@ ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC -END(int3) +ENDPROC(int3) ENTRY(general_protection) RING0_EC_FRAME @@ -1417,7 +1669,7 @@ ENTRY(general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC -END(general_protection) +ENDPROC(general_protection) /* * End of kprobes section diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 34a56a9..4aa5c8b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -53,6 +53,8 @@ #include #include #include +#include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -64,8 +66,9 @@ #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) + pax_force_retaddr retq -END(mcount) +ENDPROC(mcount) ENTRY(ftrace_caller) cmpl $0, function_trace_stop @@ -88,8 +91,9 @@ GLOBAL(ftrace_graph_call) #endif GLOBAL(ftrace_stub) + pax_force_retaddr retq -END(ftrace_caller) +ENDPROC(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -108,6 +112,7 @@ ENTRY(mcount) #endif GLOBAL(ftrace_stub) + pax_force_retaddr retq trace: @@ -117,12 +122,13 @@ trace: movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi + pax_force_fptr ftrace_trace_function call *ftrace_trace_function MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +ENDPROC(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -142,8 +148,9 @@ ENTRY(ftrace_graph_caller) MCOUNT_RESTORE_FRAME + pax_force_retaddr retq -END(ftrace_graph_caller) +ENDPROC(ftrace_graph_caller) GLOBAL(return_to_handler) subq $24, %rsp @@ -159,6 +166,7 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $16, %rsp + pax_force_retaddr retq #endif @@ -174,6 +182,282 @@ ENTRY(native_usergs_sysret64) ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ + .macro ljmpq sel, off +#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) || defined (CONFIG_MATOM) + .byte 0x48; ljmp *1234f(%rip) + .pushsection .rodata + .align 16 + 1234: .quad \off; .word \sel + .popsection +#else + pushq $\sel + pushq $\off + lretq +#endif + .endm + + .macro pax_enter_kernel + pax_set_fptr_mask +#ifdef CONFIG_PAX_KERNEXEC + call pax_enter_kernel +#endif + .endm + + .macro pax_exit_kernel +#ifdef CONFIG_PAX_KERNEXEC + call pax_exit_kernel +#endif + .endm + +#ifdef CONFIG_PAX_KERNEXEC +ENTRY(pax_enter_kernel) + pushq %rdi + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + + GET_CR0_INTO_RDI + bts $16,%rdi + jnc 3f + mov %cs,%edi + cmp $__KERNEL_CS,%edi + jnz 2f +1: + +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI) +#endif + + popq %rdi + pax_force_retaddr + retq + +2: ljmpq __KERNEL_CS,1f +3: ljmpq __KERNEXEC_KERNEL_CS,4f +4: SET_RDI_INTO_CR0 + jmp 1b +ENDPROC(pax_enter_kernel) + +ENTRY(pax_exit_kernel) + pushq %rdi + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + + mov %cs,%rdi + cmp $__KERNEXEC_KERNEL_CS,%edi + jz 2f +1: + +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI); +#endif + + popq %rdi + pax_force_retaddr + retq + +2: GET_CR0_INTO_RDI + btr $16,%rdi + ljmpq __KERNEL_CS,3f +3: SET_RDI_INTO_CR0 + jmp 1b +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI); +#endif + + popq %rdi + pax_force_retaddr + retq +ENDPROC(pax_exit_kernel) +#endif + + .macro pax_enter_kernel_user + pax_set_fptr_mask +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_enter_kernel_user +#endif + .endm + + .macro pax_exit_kernel_user +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_exit_kernel_user +#endif +#ifdef CONFIG_PAX_RANDKSTACK + pushq %rax + call pax_randomize_kstack + popq %rax +#endif + .endm + +#ifdef CONFIG_PAX_MEMORY_UDEREF +ENTRY(pax_enter_kernel_user) + pushq %rdi + pushq %rbx + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + + GET_CR3_INTO_RDI + mov %rdi,%rbx + add $__START_KERNEL_map,%rbx + sub phys_base(%rip),%rbx + +#ifdef CONFIG_PARAVIRT + pushq %rdi + cmpl $0, pv_info+PARAVIRT_enabled + jz 1f + i = 0 + .rept USER_PGD_PTRS + mov i*8(%rbx),%rsi + mov $0,%sil + lea i*8(%rbx),%rdi + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_set_pgd_batched) + i = i + 1 + .endr + jmp 2f +1: +#endif + + i = 0 + .rept USER_PGD_PTRS + movb $0,i*8(%rbx) + i = i + 1 + .endr + +#ifdef CONFIG_PARAVIRT +2: popq %rdi +#endif + SET_RDI_INTO_CR3 + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_RDI + bts $16,%rdi + SET_RDI_INTO_CR0 +#endif + +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI) +#endif + + popq %rbx + popq %rdi + pax_force_retaddr + retq +ENDPROC(pax_enter_kernel_user) + +ENTRY(pax_exit_kernel_user) + push %rdi + +#ifdef CONFIG_PARAVIRT + pushq %rbx + PV_SAVE_REGS(CLBR_RDI) +#endif + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_RDI + btr $16,%rdi + SET_RDI_INTO_CR0 +#endif + + GET_CR3_INTO_RDI + add $__START_KERNEL_map,%rdi + sub phys_base(%rip),%rdi + +#ifdef CONFIG_PARAVIRT + cmpl $0, pv_info+PARAVIRT_enabled + jz 1f + mov %rdi,%rbx + i = 0 + .rept USER_PGD_PTRS + mov i*8(%rbx),%rsi + mov $0x67,%sil + lea i*8(%rbx),%rdi + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_set_pgd_batched) + i = i + 1 + .endr + jmp 2f +1: +#endif + + i = 0 + .rept USER_PGD_PTRS + movb $0x67,i*8(%rdi) + i = i + 1 + .endr + +#ifdef CONFIG_PARAVIRT +2: PV_RESTORE_REGS(CLBR_RDI) + popq %rbx +#endif + + popq %rdi + pax_force_retaddr + retq +ENDPROC(pax_exit_kernel_user) +#endif + +.macro pax_erase_kstack +#ifdef CONFIG_PAX_MEMORY_STACKLEAK + call pax_erase_kstack +#endif +.endm + +#ifdef CONFIG_PAX_MEMORY_STACKLEAK +/* + * r11: thread_info + * rcx, rdx: can be clobbered + */ +ENTRY(pax_erase_kstack) + pushq %rdi + pushq %rax + pushq %r11 + + GET_THREAD_INFO(%r11) + mov TI_lowest_stack(%r11), %rdi + mov $-0xBEEF, %rax + std + +1: mov %edi, %ecx + and $THREAD_SIZE_asm - 1, %ecx + shr $3, %ecx + repne scasq + jecxz 2f + + cmp $2*8, %ecx + jc 2f + + mov $2*8, %ecx + repe scasq + jecxz 2f + jne 1b + +2: cld + mov %esp, %ecx + sub %edi, %ecx + + cmp $THREAD_SIZE_asm, %rcx + jb 3f + ud2 +3: + + shr $3, %ecx + rep stosq + + mov TI_task_thread_sp0(%r11), %rdi + sub $256, %rdi + mov %rdi, TI_lowest_stack(%r11) + + popq %r11 + popq %rax + popq %rdi + pax_force_retaddr + ret +ENDPROC(pax_erase_kstack) +#endif .macro TRACE_IRQS_IRETQ offset=ARGOFFSET #ifdef CONFIG_TRACE_IRQFLAGS @@ -233,8 +517,8 @@ ENDPROC(native_usergs_sysret64) .endm .macro UNFAKE_STACK_FRAME - addq $8*6, %rsp - CFI_ADJUST_CFA_OFFSET -(6*8) + addq $8*6 + ARG_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET -(6*8 + ARG_SKIP) .endm /* @@ -317,7 +601,7 @@ ENTRY(save_args) leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ - testl $3, CS(%rdi) + testb $3, CS(%rdi) je 1f SWAPGS /* @@ -337,9 +621,10 @@ ENTRY(save_args) * We entered an interrupt context - irqs are off: */ 2: TRACE_IRQS_OFF + pax_force_retaddr ret CFI_ENDPROC -END(save_args) +ENDPROC(save_args) ENTRY(save_rest) PARTIAL_FRAME 1 REST_SKIP+8 @@ -352,9 +637,10 @@ ENTRY(save_rest) movq_cfi r15, R15+16 movq %r11, 8(%rsp) /* return address */ FIXUP_TOP_OF_STACK %r11, 16 + pax_force_retaddr ret CFI_ENDPROC -END(save_rest) +ENDPROC(save_rest) /* save complete stack frame */ .pushsection .kprobes.text, "ax" @@ -383,9 +669,10 @@ ENTRY(save_paranoid) js 1f /* negative -> in kernel */ SWAPGS xorl %ebx,%ebx -1: ret +1: pax_force_retaddr_bts + ret CFI_ENDPROC -END(save_paranoid) +ENDPROC(save_paranoid) .popsection /* @@ -409,7 +696,7 @@ ENTRY(ret_from_fork) RESTORE_REST - testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? + testb $3, CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET @@ -419,7 +706,7 @@ ENTRY(ret_from_fork) jmp ret_from_sys_call # go to the SYSRET fastpath CFI_ENDPROC -END(ret_from_fork) +ENDPROC(ret_from_fork) /* * System call entry. Upto 6 arguments in registers are supported. @@ -455,7 +742,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_DEF_CFA rsp,0 CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -468,12 +755,13 @@ ENTRY(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp + SAVE_ARGS 8*6,1 + pax_enter_kernel_user /* * No need to follow this irqs off/on section - it's straight * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET @@ -483,7 +771,7 @@ ENTRY(system_call_after_swapgs) system_call_fastpath: cmpq $__NR_syscall_max,%rax ja badsys - movq %r10,%rcx + movq R10-ARGOFFSET(%rsp),%rcx call *sys_call_table(,%rax,8) # XXX: rip relative movq %rax,RAX-ARGOFFSET(%rsp) /* @@ -502,6 +790,8 @@ sysret_check: andl %edi,%edx jnz sysret_careful CFI_REMEMBER_STATE + pax_exit_kernel_user + pax_erase_kstack /* * sysretq will re-enable interrupts: */ @@ -555,14 +845,18 @@ badsys: * jump back to the normal fast path. */ auditsys: - movq %r10,%r9 /* 6th arg: 4th syscall arg */ + movq R10-ARGOFFSET(%rsp),%r9 /* 6th arg: 4th syscall arg */ movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ movq %rax,%rsi /* 2nd arg: syscall number */ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ call audit_syscall_entry + + pax_erase_kstack + LOAD_ARGS 0 /* reload call-clobbered registers */ + pax_set_fptr_mask jmp system_call_fastpath /* @@ -592,16 +886,20 @@ tracesys: FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter + + pax_erase_kstack + /* * Reload arg registers from stack in case ptrace changed them. * We don't reload %rax because syscall_trace_enter() returned * the value it wants us to use in the table lookup. */ LOAD_ARGS ARGOFFSET, 1 + pax_set_fptr_mask RESTORE_REST cmpq $__NR_syscall_max,%rax ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ - movq %r10,%rcx /* fixup for C */ + movq R10-ARGOFFSET(%rsp),%rcx /* fixup for C */ call *sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ @@ -613,7 +911,7 @@ tracesys: GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $3,CS-ARGOFFSET(%rsp) + testb $3,CS-ARGOFFSET(%rsp) je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ @@ -674,7 +972,7 @@ int_restore_rest: TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC -END(system_call) +ENDPROC(system_call) /* * Certain special system calls that need to save a complete full stack frame. @@ -690,7 +988,7 @@ ENTRY(\label) call \func jmp ptregscall_common CFI_ENDPROC -END(\label) +ENDPROC(\label) .endm PTREGSCALL stub_clone, sys_clone, %r8 @@ -708,9 +1006,10 @@ ENTRY(ptregscall_common) movq_cfi_restore R12+8, r12 movq_cfi_restore RBP+8, rbp movq_cfi_restore RBX+8, rbx + pax_force_retaddr ret $REST_SKIP /* pop extended registers */ CFI_ENDPROC -END(ptregscall_common) +ENDPROC(ptregscall_common) ENTRY(stub_execve) CFI_STARTPROC @@ -726,7 +1025,7 @@ ENTRY(stub_execve) RESTORE_REST jmp int_ret_from_sys_call CFI_ENDPROC -END(stub_execve) +ENDPROC(stub_execve) /* * sigreturn is special because it needs to restore all registers on return. @@ -744,7 +1043,7 @@ ENTRY(stub_rt_sigreturn) RESTORE_REST jmp int_ret_from_sys_call CFI_ENDPROC -END(stub_rt_sigreturn) +ENDPROC(stub_rt_sigreturn) /* * Build the entry stubs and pointer table with some assembler magic. @@ -780,7 +1079,7 @@ vector=vector+1 2: jmp common_interrupt .endr CFI_ENDPROC -END(irq_entries_start) +ENDPROC(irq_entries_start) .previous END(interrupt) @@ -800,6 +1099,16 @@ END(interrupt) CFI_ADJUST_CFA_OFFSET 10*8 call save_args PARTIAL_FRAME 0 +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rdi) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif call \func .endm @@ -822,7 +1131,7 @@ ret_from_intr: CFI_ADJUST_CFA_OFFSET -8 exit_intr: GET_THREAD_INFO(%rcx) - testl $3,CS-ARGOFFSET(%rsp) + testb $3,CS-ARGOFFSET(%rsp) je retint_kernel /* Interrupt came from user space */ @@ -844,12 +1153,16 @@ retint_swapgs: /* return to user-space */ * The iretq could re-enable interrupts: */ DISABLE_INTERRUPTS(CLBR_ANY) + pax_exit_kernel_user + pax_erase_kstack TRACE_IRQS_IRETQ SWAPGS jmp restore_args retint_restore_args: /* return to kernel space */ DISABLE_INTERRUPTS(CLBR_ANY) + pax_exit_kernel + pax_force_retaddr RIP-ARGOFFSET /* * The iretq could re-enable interrupts: */ @@ -940,7 +1253,7 @@ ENTRY(retint_kernel) #endif CFI_ENDPROC -END(common_interrupt) +ENDPROC(common_interrupt) /* * APIC interrupts. @@ -953,7 +1266,7 @@ ENTRY(\sym) interrupt \do_sym jmp ret_from_intr CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm #ifdef CONFIG_SMP @@ -1032,12 +1345,22 @@ ENTRY(\sym) CFI_ADJUST_CFA_OFFSET 15*8 call error_entry DEFAULT_FRAME 0 +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm .macro paranoidzeroentry sym do_sym @@ -1049,12 +1372,22 @@ ENTRY(\sym) subq $15*8, %rsp call save_paranoid TRACE_IRQS_OFF +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm .macro paranoidzeroentry_ist sym do_sym ist @@ -1066,15 +1399,30 @@ ENTRY(\sym) subq $15*8, %rsp call save_paranoid TRACE_IRQS_OFF +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - PER_CPU(init_tss, %rbp) +#ifdef CONFIG_SMP + imul $TSS_size, PER_CPU_VAR(cpu_number), %ebp + lea init_tss(%rbp), %rbp +#else + lea init_tss(%rip), %rbp +#endif subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) call \do_sym addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm .macro errorentry sym do_sym @@ -1085,13 +1433,23 @@ ENTRY(\sym) CFI_ADJUST_CFA_OFFSET 15*8 call error_entry DEFAULT_FRAME 0 +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif movq %rsp,%rdi /* pt_regs pointer */ movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm /* error code is on the stack already */ @@ -1104,13 +1462,23 @@ ENTRY(\sym) call save_paranoid DEFAULT_FRAME 0 TRACE_IRQS_OFF +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif movq %rsp,%rdi /* pt_regs pointer */ movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm zeroentry divide_error do_divide_error @@ -1141,9 +1509,10 @@ gs_change: SWAPGS popf CFI_ADJUST_CFA_OFFSET -8 + pax_force_retaddr ret CFI_ENDPROC -END(native_load_gs_index) +ENDPROC(native_load_gs_index) .section __ex_table,"a" .align 8 @@ -1193,11 +1562,12 @@ ENTRY(kernel_thread) * of hacks for example to fork off the per-CPU idle tasks. * [Hopefully no generic code relies on the reschedule -AK] */ - RESTORE_ALL + RESTORE_REST UNFAKE_STACK_FRAME + pax_force_retaddr ret CFI_ENDPROC -END(kernel_thread) +ENDPROC(kernel_thread) ENTRY(child_rip) pushq $0 # fake return address @@ -1208,13 +1578,14 @@ ENTRY(child_rip) */ movq %rdi, %rax movq %rsi, %rdi + pax_force_fptr %rax call *%rax # exit mov %eax, %edi call do_exit ud2 # padding for call trace CFI_ENDPROC -END(child_rip) +ENDPROC(child_rip) /* * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. @@ -1241,11 +1612,11 @@ ENTRY(kernel_execve) RESTORE_REST testq %rax,%rax je int_ret_from_sys_call - RESTORE_ARGS UNFAKE_STACK_FRAME + pax_force_retaddr ret CFI_ENDPROC -END(kernel_execve) +ENDPROC(kernel_execve) /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) @@ -1263,9 +1634,10 @@ ENTRY(call_softirq) CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 decl PER_CPU_VAR(irq_count) + pax_force_retaddr ret CFI_ENDPROC -END(call_softirq) +ENDPROC(call_softirq) #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback @@ -1303,7 +1675,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC -END(xen_do_hypervisor_callback) +ENDPROC(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1362,7 +1734,7 @@ ENTRY(xen_failsafe_callback) SAVE_ALL jmp error_exit CFI_ENDPROC -END(xen_failsafe_callback) +ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ @@ -1405,16 +1777,31 @@ ENTRY(paranoid_exit) TRACE_IRQS_OFF testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore - testl $3,CS(%rsp) + testb $3,CS(%rsp) jnz paranoid_userspace +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel + TRACE_IRQS_IRETQ 0 + SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + pax_force_retaddr_bts + jmp irq_return +#endif paranoid_swapgs: +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel_user +#else + pax_exit_kernel +#endif TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK RESTORE_ALL 8 jmp irq_return paranoid_restore: + pax_exit_kernel TRACE_IRQS_IRETQ 0 RESTORE_ALL 8 + pax_force_retaddr_bts jmp irq_return paranoid_userspace: GET_THREAD_INFO(%rcx) @@ -1443,7 +1830,7 @@ paranoid_schedule: TRACE_IRQS_OFF jmp paranoid_userspace CFI_ENDPROC -END(paranoid_exit) +ENDPROC(paranoid_exit) /* * Exception entry point. This expects an error code/orig_rax on the stack. @@ -1470,12 +1857,13 @@ ENTRY(error_entry) movq_cfi r14, R14+8 movq_cfi r15, R15+8 xorl %ebx,%ebx - testl $3,CS+8(%rsp) + testb $3,CS+8(%rsp) je error_kernelspace error_swapgs: SWAPGS error_sti: TRACE_IRQS_OFF + pax_force_retaddr_bts ret CFI_ENDPROC @@ -1497,7 +1885,7 @@ error_kernelspace: cmpq $gs_change,RIP+8(%rsp) je error_swapgs jmp error_sti -END(error_entry) +ENDPROC(error_entry) /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ @@ -1517,7 +1905,7 @@ ENTRY(error_exit) jnz retint_careful jmp retint_swapgs CFI_ENDPROC -END(error_exit) +ENDPROC(error_exit) /* runs on exception stack */ @@ -1529,6 +1917,16 @@ ENTRY(nmi) CFI_ADJUST_CFA_OFFSET 15*8 call save_paranoid DEFAULT_FRAME 0 +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi @@ -1539,12 +1937,28 @@ ENTRY(nmi) DISABLE_INTERRUPTS(CLBR_NONE) testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore - testl $3,CS(%rsp) + testb $3,CS(%rsp) jnz nmi_userspace +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel + SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + pax_force_retaddr_bts + jmp irq_return +#endif nmi_swapgs: +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel_user +#else + pax_exit_kernel +#endif SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + jmp irq_return nmi_restore: + pax_exit_kernel RESTORE_ALL 8 + pax_force_retaddr_bts jmp irq_return nmi_userspace: GET_THREAD_INFO(%rcx) @@ -1573,14 +1987,14 @@ nmi_schedule: jmp paranoid_exit CFI_ENDPROC #endif -END(nmi) +ENDPROC(nmi) ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax sysret CFI_ENDPROC -END(ignore_sysret) +ENDPROC(ignore_sysret) /* * End of kprobes section diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527..7b3615a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -103,7 +103,7 @@ static void *mod_code_ip; /* holds the IP to write to */ static void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; -static atomic_t nmi_update_count = ATOMIC_INIT(0); +static atomic_unchecked_t nmi_update_count = ATOMIC_INIT(0); int ftrace_arch_read_dyn_info(char *buf, int size) { @@ -111,7 +111,7 @@ int ftrace_arch_read_dyn_info(char *buf, int size) r = snprintf(buf, size, "%u %u", nmi_wait_count, - atomic_read(&nmi_update_count)); + atomic_read_unchecked(&nmi_update_count)); return r; } @@ -149,8 +149,10 @@ void ftrace_nmi_enter(void) { if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); + pax_open_kernel(); ftrace_mod_code(); - atomic_inc(&nmi_update_count); + pax_close_kernel(); + atomic_inc_unchecked(&nmi_update_count); } /* Must have previous changes seen before executions */ smp_mb(); @@ -215,7 +217,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE] __read_only; static unsigned char *ftrace_nop_replace(void) { @@ -228,6 +230,8 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, { unsigned char replaced[MCOUNT_INSN_SIZE]; + ip = ktla_ktva(ip); + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -284,7 +288,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char old[MCOUNT_INSN_SIZE], *new; int ret; - memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); + memcpy(old, (void *)ktla_ktva((unsigned long)ftrace_call), MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, (unsigned long)func); ret = ftrace_modify_code(ip, old, new); @@ -337,15 +341,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); - memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); + memcpy(ftrace_nop, ktla_ktva(ftrace_test_p6nop), MCOUNT_INSN_SIZE); break; case 1: pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); - memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); + memcpy(ftrace_nop, ktla_ktva(ftrace_test_nop5), MCOUNT_INSN_SIZE); break; case 2: pr_info("ftrace: converting mcount calls to jmp . + 5\n"); - memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); + memcpy(ftrace_nop, ktla_ktva(ftrace_test_jmp), MCOUNT_INSN_SIZE); break; } @@ -366,6 +370,8 @@ static int ftrace_mod_jmp(unsigned long ip, { unsigned char code[MCOUNT_INSN_SIZE]; + ip = ktla_ktva(ip); + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 4f8e250..df24706 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -16,6 +16,7 @@ #include #include #include +#include static void __init i386_default_early_setup(void) { @@ -31,7 +32,7 @@ void __init i386_start_kernel(void) { reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + reserve_early(LOAD_PHYSICAL_ADDR, __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 34c3308..6fc4e76 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,10 +19,17 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) +#ifdef CONFIG_PAX_KERNEXEC +#define ta(X) (X) +#else +#define ta(X) ((X) - __PAGE_OFFSET) +#endif + /* * References to members of the new_cpu_data structure. */ @@ -52,11 +59,7 @@ * and small than max_low_pfn, otherwise will waste some page table entries */ -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PTE) /* Enough space to fit pagetables for the low memory linear map */ MAPPING_BEYOND_END = \ @@ -73,6 +76,12 @@ INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm RESERVE_BRK(pagetables, INIT_MAP_SIZE) /* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) + +/* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, * %esi points to the real-mode code as a 32-bit pointer. * CS and DS must be 4 GB flat segments, but we don't depend on @@ -80,7 +89,16 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) * can. */ __HEAD + +#ifdef CONFIG_PAX_KERNEXEC + jmp startup_32 +/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ +.fill PAGE_SIZE-5,1,0xcc +#endif + ENTRY(startup_32) + movl pa(stack_start),%ecx + /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ testb $(1<<6), BP_loadflags(%esi) @@ -95,7 +113,60 @@ ENTRY(startup_32) movl %eax,%es movl %eax,%fs movl %eax,%gs + movl %eax,%ss 2: + leal -__PAGE_OFFSET(%ecx),%esp + +#ifdef CONFIG_SMP + movl $pa(cpu_gdt_table),%edi + movl $__per_cpu_load,%eax + movw %ax,__KERNEL_PERCPU + 2(%edi) + rorl $16,%eax + movb %al,__KERNEL_PERCPU + 4(%edi) + movb %ah,__KERNEL_PERCPU + 7(%edi) + movl $__per_cpu_end - 1,%eax + subl $__per_cpu_start,%eax + movw %ax,__KERNEL_PERCPU + 0(%edi) +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF + movl $NR_CPUS,%ecx + movl $pa(cpu_gdt_table),%edi +1: + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),GDT_ENTRY_KERNEL_DS * 8 + 4(%edi) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0fb00),GDT_ENTRY_DEFAULT_USER_CS * 8 + 4(%edi) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),GDT_ENTRY_DEFAULT_USER_DS * 8 + 4(%edi) + addl $PAGE_SIZE_asm,%edi + loop 1b +#endif + +#ifdef CONFIG_PAX_KERNEXEC + movl $pa(boot_gdt),%edi + movl $__LOAD_PHYSICAL_ADDR,%eax + movw %ax,__BOOT_CS + 2(%edi) + rorl $16,%eax + movb %al,__BOOT_CS + 4(%edi) + movb %ah,__BOOT_CS + 7(%edi) + rorl $16,%eax + + ljmp $(__BOOT_CS),$1f +1: + + movl $NR_CPUS,%ecx + movl $pa(cpu_gdt_table),%edi + addl $__PAGE_OFFSET,%eax +1: + movw %ax,__KERNEL_CS + 2(%edi) + movw %ax,__KERNEXEC_KERNEL_CS + 2(%edi) + rorl $16,%eax + movb %al,__KERNEL_CS + 4(%edi) + movb %al,__KERNEXEC_KERNEL_CS + 4(%edi) + movb %ah,__KERNEL_CS + 7(%edi) + movb %ah,__KERNEXEC_KERNEL_CS + 7(%edi) + rorl $16,%eax + addl $PAGE_SIZE_asm,%edi + loop 1b +#endif /* * Clear BSS first so that there are no surprises... @@ -140,9 +211,7 @@ ENTRY(startup_32) cmpl $num_subarch_entries, %eax jae bad_subarch - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax + jmp *pa(subarch_entries)(,%eax,4) bad_subarch: WEAK(lguest_entry) @@ -154,10 +223,10 @@ WEAK(xen_entry) __INITDATA subarch_entries: - .long default_entry /* normal x86/PC */ - .long lguest_entry /* lguest hypervisor */ - .long xen_entry /* Xen hypervisor */ - .long default_entry /* Moorestown MID */ + .long ta(default_entry) /* normal x86/PC */ + .long ta(lguest_entry) /* lguest hypervisor */ + .long ta(xen_entry) /* Xen hypervisor */ + .long ta(default_entry) /* Moorestown MID */ num_subarch_entries = (. - subarch_entries) / 4 .previous #endif /* CONFIG_PARAVIRT */ @@ -218,8 +287,11 @@ default_entry: movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) +#ifdef CONFIG_COMPAT_VDSO + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR+_PAGE_USER,pa(swapper_pg_pmd+0x1000*KPMDS-8) +#else + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,pa(swapper_pg_pmd+0x1000*KPMDS-8) +#endif #else /* Not PAE */ page_pde_offset = (__PAGE_OFFSET >> 20); @@ -249,8 +321,11 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(swapper_pg_dir+0xffc) +#ifdef CONFIG_COMPAT_VDSO + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR+_PAGE_USER,pa(swapper_pg_dir+0xffc) +#else + movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,pa(swapper_pg_dir+0xffc) +#endif #endif jmp 3f /* @@ -272,6 +347,9 @@ ENTRY(startup_32_smp) movl %eax,%es movl %eax,%fs movl %eax,%gs + movl pa(stack_start),%ecx + movl %eax,%ss + leal -__PAGE_OFFSET(%ecx),%esp #endif /* CONFIG_SMP */ 3: @@ -297,6 +375,7 @@ ENTRY(startup_32_smp) orl %edx,%eax movl %eax,%cr4 +#ifdef CONFIG_X86_PAE btl $5, %eax # check if PAE is enabled jnc 6f @@ -305,6 +384,10 @@ ENTRY(startup_32_smp) cpuid cmpl $0x80000000, %eax jbe 6f + + /* Clear bogus XD_DISABLE bits */ + call verify_cpu + mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ @@ -312,13 +395,17 @@ ENTRY(startup_32_smp) jnc 6f /* Setup EFER (Extended Feature Enable Register) */ - movl $0xc0000080, %ecx + movl $MSR_EFER, %ecx rdmsr btsl $11, %eax /* Make changes effective */ wrmsr + btsl $_PAGE_BIT_NX-32,pa(__supported_pte_mask+4) + movl $1,pa(nx_enabled) +#endif + 6: /* @@ -331,8 +418,8 @@ ENTRY(startup_32_smp) movl %eax,%cr0 /* ..and set paging (PG) bit */ ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 1: - /* Set up the stack pointer */ - lss stack_start,%esp + /* Shift the stack pointer to a virtual address */ + addl $__PAGE_OFFSET, %esp /* * Initialize eflags. Some BIOS's leave bits like NT set. This would @@ -344,9 +431,7 @@ ENTRY(startup_32_smp) #ifdef CONFIG_SMP cmpb $0, ready - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype #endif /* CONFIG_SMP */ /* @@ -424,7 +509,7 @@ is386: movl $2,%ecx # set MP 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl $(__USER_DS),%eax # DS/ES contains default USER segment +# movl $(__KERNEL_DS),%eax # DS/ES contains default KERNEL segment movl %eax,%ds movl %eax,%es @@ -438,15 +523,22 @@ is386: movl $2,%ecx # set MP */ cmpb $0,ready jne 1f - movl $per_cpu__gdt_page,%eax + movl $cpu_gdt_table,%eax movl $per_cpu__stack_canary,%ecx +#ifdef CONFIG_SMP + addl $__per_cpu_load,%ecx +#endif movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) 1: -#endif movl $(__KERNEL_STACK_CANARY),%eax +#elif defined(CONFIG_PAX_MEMORY_UDEREF) + movl $(__USER_DS),%eax +#else + xorl %eax,%eax +#endif movl %eax,%gs xorl %eax,%eax # Clear LDT @@ -454,14 +546,7 @@ is386: movl $2,%ecx # set MP cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder -#ifdef CONFIG_SMP - movb ready, %cl movb $1, ready - cmpb $0,%cl # the first CPU calls start_kernel - je 1f - movl (stack_start), %esp -1: -#endif /* CONFIG_SMP */ jmp *(initial_code) /* @@ -546,22 +631,22 @@ early_page_fault: jmp early_fault early_fault: - cld #ifdef CONFIG_PRINTK + cmpl $1,%ss:early_recursion_flag + je hlt_loop + incl %ss:early_recursion_flag + cld pusha movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag movl %cr2,%eax pushl %eax pushl %edx /* trapno */ pushl $fault_msg call printk +; call dump_stack #endif - call dump_stack hlt_loop: hlt jmp hlt_loop @@ -569,8 +654,11 @@ hlt_loop: /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: - cld #ifdef CONFIG_PRINTK + cmpl $2,%ss:early_recursion_flag + je hlt_loop + incl %ss:early_recursion_flag + cld pushl %eax pushl %ecx pushl %edx @@ -579,9 +667,6 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag pushl 16(%esp) pushl 24(%esp) pushl 32(%esp) @@ -600,6 +685,8 @@ ignore_int: #endif iret +#include "verify_cpu.S" + __REFDATA .align 4 ENTRY(initial_code) @@ -610,31 +697,47 @@ ENTRY(initial_page_table) /* * BSS section */ -__PAGE_ALIGNED_BSS - .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE +.section .swapper_pg_pmd,"a",@progbits swapper_pg_pmd: .fill 1024*KPMDS,4,0 #else +.section .swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) .fill 1024,4,0 #endif +.section .swapper_pg_fixmap,"a",@progbits swapper_pg_fixmap: .fill 1024,4,0 #ifdef CONFIG_X86_TRAMPOLINE +.section .trampoline_pg_dir,"a",@progbits ENTRY(trampoline_pg_dir) +#ifdef CONFIG_X86_PAE + .fill 4,8,0 +#else .fill 1024,4,0 #endif +#endif + +.section .empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 /* + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 + +/* * This starts the data section. */ #ifdef CONFIG_X86_PAE -__PAGE_ALIGNED_DATA - /* Page-aligned for the benefit of paravirt? */ - .align PAGE_SIZE_asm +.section .swapper_pg_dir,"a",@progbits + ENTRY(swapper_pg_dir) .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 @@ -653,15 +756,24 @@ ENTRY(swapper_pg_dir) # error "Kernel PMDs should be 1, 2 or 3" # endif .align PAGE_SIZE_asm /* needs to be page-sized too */ + +#ifdef CONFIG_PAX_PER_CPU_PGD +ENTRY(cpu_pgd) + .rept NR_CPUS + .fill 4,8,0 + .endr +#endif + #endif .data +.balign 4 ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE - .long __BOOT_DS + .long init_thread_union+THREAD_SIZE-8 ready: .byte 0 +.section .rodata,"a",@progbits early_recursion_flag: .long 0 @@ -697,7 +809,7 @@ fault_msg: .word 0 # 32 bit align gdt_desc.address boot_gdt_descr: .word __BOOT_DS+7 - .long boot_gdt - __PAGE_OFFSET + .long pa(boot_gdt) .word 0 # 32-bit align idt_desc.address idt_descr: @@ -708,7 +820,7 @@ idt_descr: .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) .word GDT_ENTRIES*8-1 - .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ + .long cpu_gdt_table /* Overwritten for secondary CPUs */ /* * The boot_gdt must mirror the equivalent in setup.S and is @@ -717,5 +829,65 @@ ENTRY(early_gdt_descr) .align L1_CACHE_BYTES ENTRY(boot_gdt) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ + + .align PAGE_SIZE_asm +ENTRY(cpu_gdt_table) + .rept NR_CPUS + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ + .quad 0x0000000000000000 /* 0x13 reserved */ + .quad 0x0000000000000000 /* 0x1b reserved */ + +#ifdef CONFIG_PAX_KERNEXEC + .quad 0x00cf9b000000ffff /* 0x20 alternate kernel 4GB code at 0x00000000 */ +#else + .quad 0x0000000000000000 /* 0x20 unused */ +#endif + + .quad 0x0000000000000000 /* 0x28 unused */ + .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ + .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ + .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ + .quad 0x0000000000000000 /* 0x4b reserved */ + .quad 0x0000000000000000 /* 0x53 reserved */ + .quad 0x0000000000000000 /* 0x5b reserved */ + + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ + + .quad 0x0000000000000000 /* 0x80 TSS descriptor */ + .quad 0x0000000000000000 /* 0x88 LDT descriptor */ + + /* + * Segments used for calling PnP BIOS have byte granularity. + * The code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + .quad 0x00409b000000ffff /* 0x90 32-bit code */ + .quad 0x00009b000000ffff /* 0x98 16-bit code */ + .quad 0x000093000000ffff /* 0xa0 16-bit data */ + .quad 0x0000930000000000 /* 0xa8 16-bit data */ + .quad 0x0000930000000000 /* 0xb0 16-bit data */ + + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + .quad 0x00409b000000ffff /* 0xb8 APM CS code */ + .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004093000000ffff /* 0xc8 APM DS data */ + + .quad 0x00c0930000000000 /* 0xd0 - ESPFIX SS */ + .quad 0x0040930000000000 /* 0xd8 - PERCPU */ + .quad 0x0040910000000017 /* 0xe0 - STACK_CANARY */ + .quad 0x0000000000000000 /* 0xe8 - PCIBIOS_CS */ + .quad 0x0000000000000000 /* 0xf0 - PCIBIOS_DS */ + .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm - GDT_SIZE,1,0 + .endr diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd92..758b2a6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,8 @@ #include #include #include +#include +#include #ifdef CONFIG_PARAVIRT #include @@ -38,6 +40,12 @@ L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) +L4_VMALLOC_START = pgd_index(VMALLOC_START) +L3_VMALLOC_START = pud_index(VMALLOC_START) +L4_VMALLOC_END = pgd_index(VMALLOC_END) +L3_VMALLOC_END = pud_index(VMALLOC_END) +L4_VMEMMAP_START = pgd_index(VMEMMAP_START) +L3_VMEMMAP_START = pud_index(VMEMMAP_START) .text __HEAD @@ -85,35 +93,23 @@ startup_64: */ addq %rbp, init_level4_pgt + 0(%rip) addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) + addq %rbp, init_level4_pgt + (L4_VMALLOC_START*8)(%rip) + addq %rbp, init_level4_pgt + (L4_VMALLOC_END*8)(%rip) + addq %rbp, init_level4_pgt + (L4_VMEMMAP_START*8)(%rip) addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) addq %rbp, level3_ident_pgt + 0(%rip) +#ifndef CONFIG_XEN + addq %rbp, level3_ident_pgt + 8(%rip) +#endif - addq %rbp, level3_kernel_pgt + (510*8)(%rip) - addq %rbp, level3_kernel_pgt + (511*8)(%rip) + addq %rbp, level3_vmemmap_pgt + (L3_VMEMMAP_START*8)(%rip) + + addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8)(%rip) + addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8+8)(%rip) addq %rbp, level2_fixmap_pgt + (506*8)(%rip) - - /* Add an Identity mapping if I am above 1G */ - leaq _text(%rip), %rdi - andq $PMD_PAGE_MASK, %rdi - - movq %rdi, %rax - shrq $PUD_SHIFT, %rax - andq $(PTRS_PER_PUD - 1), %rax - jz ident_complete - - leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx - leaq level3_ident_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) - - movq %rdi, %rax - shrq $PMD_SHIFT, %rax - andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx - leaq level2_spare_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) -ident_complete: + addq %rbp, level2_fixmap_pgt + (507*8)(%rip) /* * Fixup the kernel text+data virtual addresses. Note that @@ -161,8 +157,8 @@ ENTRY(secondary_startup_64) * after the boot processor executes this code. */ - /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %eax + /* Enable PAE mode and PSE/PGE */ + movl $(X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE), %eax movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ @@ -184,9 +180,16 @@ ENTRY(secondary_startup_64) movl $MSR_EFER, %ecx rdmsr btsl $_EFER_SCE, %eax /* Enable System Call */ - btl $20,%edi /* No Execute supported? */ + btl $(X86_FEATURE_NX & 31),%edi /* No Execute supported? */ jnc 1f btsl $_EFER_NX, %eax + leaq init_level4_pgt(%rip), %rdi +#ifndef CONFIG_EFI + btsq $_PAGE_BIT_NX, 8*L4_PAGE_OFFSET(%rdi) +#endif + btsq $_PAGE_BIT_NX, 8*L4_VMALLOC_START(%rdi) + btsq $_PAGE_BIT_NX, 8*L4_VMALLOC_END(%rdi) + btsq $_PAGE_BIT_NX, 8*L4_VMEMMAP_START(%rdi) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ @@ -249,6 +252,7 @@ ENTRY(secondary_startup_64) * jump. In addition we need to ensure %cs is set so we make this * a far return. */ + pax_set_fptr_mask movq initial_code(%rip),%rax pushq $0 # fake return address to stop unwinder pushq $__KERNEL_CS # set correct cs @@ -262,16 +266,16 @@ ENTRY(secondary_startup_64) .quad x86_64_start_kernel ENTRY(initial_gs) .quad INIT_PER_CPU_VAR(irq_stack_union) - __FINITDATA ENTRY(stack_start) .quad init_thread_union+THREAD_SIZE-8 .word 0 + __FINITDATA bad_address: jmp bad_address - .section ".init.text","ax" + __INIT #ifdef CONFIG_EARLY_PRINTK .globl early_idt_handlers early_idt_handlers: @@ -316,18 +320,23 @@ ENTRY(early_idt_handler) #endif /* EARLY_PRINTK */ 1: hlt jmp 1b + .previous #ifdef CONFIG_EARLY_PRINTK + __INITDATA early_recursion_flag: .long 0 + .previous + .section .rodata,"a",@progbits early_idt_msg: .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" early_idt_ripmsg: .asciz "RIP %s\n" + .previous #endif /* CONFIG_EARLY_PRINTK */ - .previous + .section .rodata,"a",@progbits #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ ENTRY(name) @@ -350,13 +359,41 @@ NEXT_PAGE(init_level4_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_VMALLOC_START*8, 0 + .quad level3_vmalloc_start_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_VMALLOC_END*8, 0 + .quad level3_vmalloc_end_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_VMEMMAP_START*8, 0 + .quad level3_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE +#ifdef CONFIG_PAX_PER_CPU_PGD +NEXT_PAGE(cpu_pgd) + .rept NR_CPUS + .fill 512,8,0 + .endr +#endif + NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +#ifdef CONFIG_XEN .fill 511,8,0 +#else + .quad level2_ident_pgt + PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE + .fill 510,8,0 +#endif + +NEXT_PAGE(level3_vmalloc_start_pgt) + .fill 512,8,0 + +NEXT_PAGE(level3_vmalloc_end_pgt) + .fill 512,8,0 + +NEXT_PAGE(level3_vmemmap_pgt) + .fill L3_VMEMMAP_START,8,0 + .quad level2_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE NEXT_PAGE(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 @@ -364,20 +401,23 @@ NEXT_PAGE(level3_kernel_pgt) .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE +NEXT_PAGE(level2_vmemmap_pgt) + .fill 512,8,0 + NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 + .fill 507,8,0 + .quad level1_vsyscall_pgt - __START_KERNEL_map + _PAGE_TABLE + /* 6MB reserved for vsyscalls + a 2MB hole = 3 + 1 entries */ + .fill 4,8,0 -NEXT_PAGE(level1_fixmap_pgt) +NEXT_PAGE(level1_vsyscall_pgt) .fill 512,8,0 -NEXT_PAGE(level2_ident_pgt) - /* Since I easily can, map the first 1G. + /* Since I easily can, map the first 2G. * Don't set NX because code runs from these pages. */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +NEXT_PAGE(level2_ident_pgt) + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, 2*PTRS_PER_PMD) NEXT_PAGE(level2_kernel_pgt) /* @@ -390,33 +430,55 @@ NEXT_PAGE(level2_kernel_pgt) * If you want to increase this then increase MODULES_VADDR * too.) */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, - KERNEL_IMAGE_SIZE/PMD_SIZE) - -NEXT_PAGE(level2_spare_pgt) - .fill 512, 8, 0 + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) #undef PMDS #undef NEXT_PAGE - .data + .align PAGE_SIZE +ENTRY(cpu_gdt_table) + .rept NR_CPUS + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ + .quad 0x00af9b000000ffff /* __KERNEL_CS */ + .quad 0x00cf93000000ffff /* __KERNEL_DS */ + .quad 0x00cffb000000ffff /* __USER32_CS */ + .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ + .quad 0x00affb000000ffff /* __USER_CS */ + +#ifdef CONFIG_PAX_KERNEXEC + .quad 0x00af9b000000ffff /* __KERNEXEC_KERNEL_CS */ +#else + .quad 0x0 /* unused */ +#endif + + .quad 0,0 /* TSS */ + .quad 0,0 /* LDT */ + .quad 0,0,0 /* three TLS descriptors */ + .quad 0x0000f40000000000 /* node/CPU stored in limit */ + /* asm/segment.h:GDT_ENTRIES must match this */ + + /* zero the remaining page */ + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 + .endr + .align 16 .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 early_gdt_descr_base: - .quad INIT_PER_CPU_VAR(gdt_page) + .quad cpu_gdt_table ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 #include "../../x86/xen/xen-head.S" - - .section .bss, "aw", @nobits + + .section .rodata,"a",@progbits .align L1_CACHE_BYTES ENTRY(idt_table) - .skip IDT_ENTRIES * 16 + .fill 512,8,0 __PAGE_ALIGNED_BSS .align PAGE_SIZE diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 9c3bd4a..e1d9b35 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -20,8 +20,12 @@ extern void cmpxchg8b_emu(void); EXPORT_SYMBOL(cmpxchg8b_emu); #endif +EXPORT_SYMBOL_GPL(cpu_gdt_table); + /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(csum_partial_copy_generic_to_user); +EXPORT_SYMBOL(csum_partial_copy_generic_from_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -36,3 +40,7 @@ EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PAX_KERNEXEC +EXPORT_SYMBOL(__LOAD_PHYSICAL_ADDR); +#endif diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102..a244320 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -208,7 +208,7 @@ spurious_8259A_irq: "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3a54dcb..1c22348 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -20,8 +20,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; +union thread_union init_thread_union __init_task_data; /* * Initial task structure. @@ -38,5 +37,5 @@ EXPORT_SYMBOL(init_task); * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - +struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; +EXPORT_SYMBOL(init_tss); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 99c4d30..74c84e9 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,12 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on && grsec_disable_privio) { + gr_handle_ioperm(); + return -EPERM; + } +#endif if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; @@ -67,7 +74,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -111,6 +118,12 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + if (grsec_disable_privio) { + gr_handle_iopl(); + return -EPERM; + } +#endif if (!capable(CAP_SYS_RAWIO)) return -EPERM; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd52..83a07d9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -15,7 +15,7 @@ #include #include -atomic_t irq_err_count; +atomic_unchecked_t irq_err_count; /* Function pointer for generic interrupt vector handling */ void (*generic_interrupt_extension)(void) = NULL; @@ -114,9 +114,9 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_printf(p, " Machine check polls\n"); #endif - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read_unchecked(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); + seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read_unchecked(&irq_mis_count)); #endif return 0; } @@ -209,10 +209,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu) u64 arch_irq_stat(void) { - u64 sum = atomic_read(&irq_err_count); + u64 sum = atomic_read_unchecked(&irq_err_count); #ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); + sum += atomic_read_unchecked(&irq_mis_count); #endif return sum; } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0f..03f1d52 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -35,7 +35,7 @@ static int check_stack_overflow(void) __asm__ __volatile__("andl %%esp,%0" : "=r" (sp) : "0" (THREAD_SIZE - 1)); - return sp < (sizeof(struct thread_info) + STACK_WARN); + return sp < STACK_WARN; } static void print_stack_overflow(void) @@ -54,9 +54,9 @@ static inline void print_stack_overflow(void) { } * per-CPU IRQ handling contexts (thread information and stack) */ union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -} __attribute__((aligned(PAGE_SIZE))); + unsigned long previous_esp; + u32 stack[THREAD_SIZE/sizeof(u32)]; +} __attribute__((aligned(THREAD_SIZE))); static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); @@ -78,10 +78,9 @@ static void call_on_stack(void *func, void *stack) static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { - union irq_ctx *curctx, *irqctx; + union irq_ctx *irqctx; u32 *isp, arg1, arg2; - curctx = (union irq_ctx *) current_thread_info(); irqctx = __get_cpu_var(hardirq_ctx); /* @@ -90,21 +89,16 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) * handler) we can't do that and just have to keep using the * current stack (which is the irq stack already after all) */ - if (unlikely(curctx == irqctx)) + if (unlikely((void *)current_stack_pointer - (void *)irqctx < THREAD_SIZE)) return 0; /* build the stack frame on the IRQ stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; + isp = (u32 *) ((char *)irqctx + sizeof(*irqctx) - 8); + irqctx->previous_esp = current_stack_pointer; - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(MAKE_MM_SEG(0)); +#endif if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -116,6 +110,11 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) : "memory", "cc", "ecx"); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(current_thread_info()->addr_limit); +#endif + return 1; } @@ -124,28 +123,11 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) */ void __cpuinit irq_ctx_init(int cpu) { - union irq_ctx *irqctx; - if (per_cpu(hardirq_ctx, cpu)) return; - irqctx = &per_cpu(hardirq_stack, cpu); - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(hardirq_ctx, cpu) = irqctx; - - irqctx = &per_cpu(softirq_stack, cpu); - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = 0; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(softirq_ctx, cpu) = irqctx; + per_cpu(hardirq_ctx, cpu) = &per_cpu(hardirq_stack, cpu); + per_cpu(softirq_ctx, cpu) = &per_cpu(softirq_stack, cpu); printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); @@ -159,7 +141,6 @@ void irq_ctx_exit(int cpu) asmlinkage void do_softirq(void) { unsigned long flags; - struct thread_info *curctx; union irq_ctx *irqctx; u32 *isp; @@ -169,15 +150,22 @@ asmlinkage void do_softirq(void) local_irq_save(flags); if (local_softirq_pending()) { - curctx = current_thread_info(); irqctx = __get_cpu_var(softirq_ctx); - irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; + irqctx->previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + isp = (u32 *) ((char *)irqctx + sizeof(*irqctx) - 8); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(MAKE_MM_SEG(0)); +#endif call_on_stack(__do_softirq, isp); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(current_thread_info()->addr_limit); +#endif + /* * Shouldnt happen, we returned above if in_interrupt(): */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77..0baf312 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -390,13 +390,13 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, /* clear the trace bit */ linux_regs->flags &= ~X86_EFLAGS_TF; - atomic_set(&kgdb_cpu_doing_single_step, -1); + atomic_set_unchecked(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; kgdb_single_step = 1; - atomic_set(&kgdb_cpu_doing_single_step, + atomic_set_unchecked(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); } @@ -476,7 +476,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) break; case DIE_DEBUG: - if (atomic_read(&kgdb_cpu_doing_single_step) == + if (atomic_read_unchecked(&kgdb_cpu_doing_single_step) == raw_smp_processor_id()) { if (user_mode(regs)) return single_step_cont(regs, args); @@ -573,7 +573,7 @@ unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) return instruction_pointer(regs); } -struct kgdb_arch arch_kgdb_ops = { +const struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: */ .gdb_bpt_instr = { 0xcc }, .flags = KGDB_HW_BREAKPOINT, diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7a67820..8d15b75 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -168,9 +168,13 @@ static void __kprobes set_jmp_op(void *from, void *to) char op; s32 raddr; } __attribute__((packed)) * jop; - jop = (struct __arch_jmp_op *)from; + + jop = (struct __arch_jmp_op *)(ktla_ktva(from)); + + pax_open_kernel(); jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); jop->op = RELATIVEJUMP_INSTRUCTION; + pax_close_kernel(); } /* @@ -195,7 +199,7 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes) kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables(ktva_ktla((unsigned long)opcodes))) return 0; /* Page fault may occur on this address. */ retry: @@ -339,7 +343,9 @@ static void __kprobes fix_riprel(struct kprobe *p) disp = (u8 *) p->addr + *((s32 *) insn) - (u8 *) p->ainsn.insn; BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ + pax_open_kernel(); *(s32 *)insn = (s32) disp; + pax_close_kernel(); } } #endif @@ -347,16 +353,18 @@ static void __kprobes fix_riprel(struct kprobe *p) static void __kprobes arch_copy_kprobe(struct kprobe *p) { - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + pax_open_kernel(); + memcpy(p->ainsn.insn, ktla_ktva(p->addr), MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + pax_close_kernel(); fix_riprel(p); - if (can_boost(p->addr)) + if (can_boost(ktla_ktva(p->addr))) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; - p->opcode = *p->addr; + p->opcode = *(ktla_ktva(p->addr)); } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -434,7 +442,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) if (p->opcode == BREAKPOINT_INSTRUCTION) regs->ip = (unsigned long)p->addr; else - regs->ip = (unsigned long)p->ainsn.insn; + regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, @@ -455,7 +463,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); - regs->ip = (unsigned long)p->ainsn.insn; + regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); preempt_enable_no_resched(); return; } @@ -525,7 +533,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) struct kprobe_ctlblk *kcb; addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); - if (*addr != BREAKPOINT_INSTRUCTION) { + if (*(kprobe_opcode_t *)ktla_ktva((unsigned long)addr) != BREAKPOINT_INSTRUCTION) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed @@ -637,6 +645,9 @@ static void __used __kprobes kretprobe_trampoline_holder(void) /* Skip orig_ax, ip, cs */ " addq $24, %rsp\n" " popfq\n" +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN + " btsq $63,(%rsp)\n" +#endif #else " pushf\n" /* @@ -777,7 +788,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); - unsigned long copy_ip = (unsigned long)p->ainsn.insn; + unsigned long copy_ip = ktva_ktla((unsigned long)p->ainsn.insn); unsigned long orig_ip = (unsigned long)p->addr; kprobe_opcode_t *insn = p->ainsn.insn; @@ -960,7 +971,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = data; int ret = NOTIFY_DONE; - if (args->regs && user_mode_vm(args->regs)) + if (args->regs && user_mode(args->regs)) return ret; switch (val) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 63b0ec8..6d92227 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -216,6 +216,7 @@ static void __init paravirt_ops_setup(void) pv_mmu_ops.set_pud = kvm_set_pud; #if PAGETABLE_LEVELS == 4 pv_mmu_ops.set_pgd = kvm_set_pgd; + pv_mmu_ops.set_pgd_batched = kvm_set_pgd; #endif #endif pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ec6ef60..ab2c824 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -66,13 +66,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP preempt_disable(); - load_LDT(pc); + load_LDT_nolock(pc); if (!cpumask_equal(mm_cpumask(current->mm), cpumask_of(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else - load_LDT(pc); + load_LDT_nolock(pc); #endif } if (oldsize) { @@ -94,7 +94,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) return err; for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); + write_ldt_entry(new->ldt, i, old->ldt + i); return 0; } @@ -115,6 +115,24 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) retval = copy_ldt(&mm->context, &old_mm->context); mutex_unlock(&old_mm->context.lock); } + + if (tsk == current) { + mm->context.vdso = 0; + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + +#endif +#endif + + } + return retval; } @@ -229,6 +247,13 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { + error = -EINVAL; + goto out_unlock; + } +#endif + fill_ldt(&ldt, &ldt_info); if (oldmode) ldt.avl = 0; diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d..f02eaf9 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -26,7 +26,7 @@ #include #include -static void set_idt(void *newidt, __u16 limit) +static void set_idt(struct desc_struct *newidt, __u16 limit) { struct desc_ptr curidt; @@ -38,7 +38,7 @@ static void set_idt(void *newidt, __u16 limit) } -static void set_gdt(void *newgdt, __u16 limit) +static void set_gdt(struct desc_struct *newgdt, __u16 limit) { struct desc_ptr curgdt; @@ -217,7 +217,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + memcpy(control_page, (void *)ktla_ktva((unsigned long)relocate_kernel), KEXEC_CONTROL_CODE_MAX_SIZE); relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 1e47679..e73449d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -364,7 +364,7 @@ static void microcode_fini_cpu_amd(int cpu) uci->mc = NULL; } -static struct microcode_ops microcode_amd_ops = { +static const struct microcode_ops microcode_amd_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info_amd, @@ -372,7 +372,7 @@ static struct microcode_ops microcode_amd_ops = { .microcode_fini_cpu = microcode_fini_cpu_amd, }; -struct microcode_ops * __init init_amd_microcode(void) +const struct microcode_ops * __init init_amd_microcode(void) { return µcode_amd_ops; } diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8..b5a6ea9 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -90,7 +90,7 @@ MODULE_LICENSE("GPL"); #define MICROCODE_VERSION "2.00" -static struct microcode_ops *microcode_ops; +static const struct microcode_ops *microcode_ops; /* * Synchronization. diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0d334dd..14cedaf 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -443,13 +443,13 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) static int get_ucode_user(void *to, const void *from, size_t n) { - return copy_from_user(to, from, n); + return copy_from_user(to, (const void __force_user *)from, n); } static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { - return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (__force_kernel void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) @@ -460,7 +460,7 @@ static void microcode_fini_cpu(int cpu) uci->mc = NULL; } -static struct microcode_ops microcode_intel_ops = { +static const struct microcode_ops microcode_intel_ops = { .request_microcode_user = request_microcode_user, .request_microcode_fw = request_microcode_fw, .collect_cpu_info = collect_cpu_info, @@ -468,7 +468,7 @@ static struct microcode_ops microcode_intel_ops = { .microcode_fini_cpu = microcode_fini_cpu, }; -struct microcode_ops * __init init_intel_microcode(void) +const struct microcode_ops * __init init_intel_microcode(void) { return µcode_intel_ops; } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 89f386f..9028f51 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -34,7 +34,7 @@ #define DEBUGP(fmt...) #endif -void *module_alloc(unsigned long size) +static void *__module_alloc(unsigned long size, pgprot_t prot) { struct vm_struct *area; @@ -48,8 +48,18 @@ void *module_alloc(unsigned long size) if (!area) return NULL; - return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_EXEC); + return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, prot); +} + +void *module_alloc(unsigned long size) +{ + +#ifdef CONFIG_PAX_KERNEXEC + return __module_alloc(size, PAGE_KERNEL); +#else + return __module_alloc(size, PAGE_KERNEL_EXEC); +#endif + } /* Free memory returned from module_alloc */ @@ -58,6 +68,40 @@ void module_free(struct module *mod, void *module_region) vfree(module_region); } +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_X86_32 +void *module_alloc_exec(unsigned long size) +{ + struct vm_struct *area; + + if (size == 0) + return NULL; + + area = __get_vm_area(size, VM_ALLOC, (unsigned long)&MODULES_EXEC_VADDR, (unsigned long)&MODULES_EXEC_END); + return area ? area->addr : NULL; +} +EXPORT_SYMBOL(module_alloc_exec); + +void module_free_exec(struct module *mod, void *module_region) +{ + vunmap(module_region); +} +EXPORT_SYMBOL(module_free_exec); +#else +void module_free_exec(struct module *mod, void *module_region) +{ + module_free(mod, module_region); +} +EXPORT_SYMBOL(module_free_exec); + +void *module_alloc_exec(unsigned long size) +{ + return __module_alloc(size, PAGE_KERNEL_RX); +} +EXPORT_SYMBOL(module_alloc_exec); +#endif +#endif + /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -77,14 +121,16 @@ int apply_relocate(Elf32_Shdr *sechdrs, unsigned int i; Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym; - uint32_t *location; + uint32_t *plocation, location; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; + plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + location = (uint32_t)plocation; + if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) + plocation = ktla_ktva((void *)plocation); /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr @@ -93,11 +139,15 @@ int apply_relocate(Elf32_Shdr *sechdrs, switch (ELF32_R_TYPE(rel[i].r_info)) { case R_386_32: /* We add the value into the location given */ - *location += sym->st_value; + pax_open_kernel(); + *plocation += sym->st_value; + pax_close_kernel(); break; case R_386_PC32: /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; + pax_open_kernel(); + *plocation += sym->st_value - location; + pax_close_kernel(); break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", @@ -153,21 +203,30 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_X86_64_NONE: break; case R_X86_64_64: + pax_open_kernel(); *(u64 *)loc = val; + pax_close_kernel(); break; case R_X86_64_32: + pax_open_kernel(); *(u32 *)loc = val; + pax_close_kernel(); if (val != *(u32 *)loc) goto overflow; break; case R_X86_64_32S: + pax_open_kernel(); *(s32 *)loc = val; + pax_close_kernel(); if ((s64)val != *(s32 *)loc) goto overflow; break; case R_X86_64_PC32: val -= (u64)loc; + pax_open_kernel(); *(u32 *)loc = val; + pax_close_kernel(); + #if 0 if ((s64)val != *(s32 *)loc) goto overflow; diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 3a7c5a4..9191528 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -13,7 +13,7 @@ default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) __raw_spin_lock(lock); } -struct pv_lock_ops pv_lock_ops = { +struct pv_lock_ops pv_lock_ops __read_only = { #ifdef CONFIG_SMP .spin_is_locked = __ticket_spin_is_locked, .spin_is_contended = __ticket_spin_is_contended, diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b1739d..dea6077 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -53,6 +53,9 @@ u64 _paravirt_ident_64(u64 x) { return x; } +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) +PV_CALLEE_SAVE_REGS_THUNK(_paravirt_ident_64); +#endif void __init default_banner(void) { @@ -122,7 +125,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, * corresponding structure. */ static void *get_call_destination(u8 type) { - struct paravirt_patch_template tmpl = { + const struct paravirt_patch_template tmpl = { .pv_init_ops = pv_init_ops, .pv_time_ops = pv_time_ops, .pv_cpu_ops = pv_cpu_ops, @@ -133,6 +136,8 @@ static void *get_call_destination(u8 type) .pv_lock_ops = pv_lock_ops, #endif }; + + pax_track_stack(); return *((void **)&tmpl + type); } @@ -145,15 +150,19 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == _paravirt_nop) + else if (opfunc == (void *)_paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); /* identity functions just return their single argument */ - else if (opfunc == _paravirt_ident_32) + else if (opfunc == (void *)_paravirt_ident_32) ret = paravirt_patch_ident_32(insnbuf, len); - else if (opfunc == _paravirt_ident_64) + else if (opfunc == (void *)_paravirt_ident_64) ret = paravirt_patch_ident_64(insnbuf, len); +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) + else if (opfunc == (void *)__raw_callee_save__paravirt_ident_64) + ret = paravirt_patch_ident_64(insnbuf, len); +#endif else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || @@ -178,7 +187,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, if (insn_len > len || start == NULL) insn_len = len; else - memcpy(insnbuf, start, insn_len); + memcpy(insnbuf, ktla_ktva(start), insn_len); return insn_len; } @@ -294,22 +303,22 @@ void arch_flush_lazy_mmu_mode(void) preempt_enable(); } -struct pv_info pv_info = { +struct pv_info pv_info __read_only = { .name = "bare hardware", .paravirt_enabled = 0, .kernel_rpl = 0, .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ }; -struct pv_init_ops pv_init_ops = { +struct pv_init_ops pv_init_ops __read_only = { .patch = native_patch, }; -struct pv_time_ops pv_time_ops = { +struct pv_time_ops pv_time_ops __read_only = { .sched_clock = native_sched_clock, }; -struct pv_irq_ops pv_irq_ops = { +struct pv_irq_ops pv_irq_ops __read_only = { .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), @@ -321,7 +330,7 @@ struct pv_irq_ops pv_irq_ops = { #endif }; -struct pv_cpu_ops pv_cpu_ops = { +struct pv_cpu_ops pv_cpu_ops __read_only = { .cpuid = native_cpuid, .get_debugreg = native_get_debugreg, .set_debugreg = native_set_debugreg, @@ -382,21 +391,26 @@ struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; -struct pv_apic_ops pv_apic_ops = { +struct pv_apic_ops pv_apic_ops __read_only = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, #endif }; -#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_PAE +/* 64-bit pagetable entries */ +#define PTE_IDENT PV_CALLEE_SAVE(_paravirt_ident_64) +#else /* 32-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) +#endif #else /* 64-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif -struct pv_mmu_ops pv_mmu_ops = { +struct pv_mmu_ops pv_mmu_ops __read_only = { .read_cr2 = native_read_cr2, .write_cr2 = native_write_cr2, @@ -448,6 +462,7 @@ struct pv_mmu_ops pv_mmu_ops = { .make_pud = PTE_IDENT, .set_pgd = native_set_pgd, + .set_pgd_batched = native_set_pgd_batched, #endif #endif /* PAGETABLE_LEVELS >= 3 */ @@ -467,6 +482,12 @@ struct pv_mmu_ops pv_mmu_ops = { }, .set_fixmap = native_set_fixmap, + +#ifdef CONFIG_PAX_KERNEXEC + .pax_open_kernel = native_pax_open_kernel, + .pax_close_kernel = native_pax_close_kernel, +#endif + }; EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1a2d4b1..6a0dd55 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -477,7 +477,7 @@ static void calgary_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } -static struct dma_map_ops calgary_dma_ops = { +static const struct dma_map_ops calgary_dma_ops = { .alloc_coherent = calgary_alloc_coherent, .free_coherent = calgary_free_coherent, .map_sg = calgary_map_sg, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 6ac3931..42b4414 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -14,7 +14,7 @@ static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -243,7 +243,7 @@ early_param("iommu", iommu_setup); int dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1c76691..e3632db 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -682,7 +682,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) return -1; } -static struct dma_map_ops gart_dma_ops = { +static const struct dma_map_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4..c898869 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -94,7 +94,7 @@ static void nommu_sync_sg_for_device(struct device *dev, flush_write_buffers(); } -struct dma_map_ops nommu_dma_ops = { +const struct dma_map_ops nommu_dma_ops = { .alloc_coherent = dma_generic_alloc_coherent, .free_coherent = nommu_free_coherent, .map_sg = nommu_map_sg, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b78..4de1881 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -25,7 +25,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } -static struct dma_map_ops swiotlb_dma_ops = { +static const struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fc6c84d..0312ca2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -51,16 +51,33 @@ void free_thread_xstate(struct task_struct *tsk) void free_thread_info(struct thread_info *ti) { - free_thread_xstate(ti->task); free_pages((unsigned long)ti, get_order(THREAD_SIZE)); } +static struct kmem_cache *task_struct_cachep; + void arch_task_cache_init(void) { - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, + /* create a slab on which task_structs can be allocated */ + task_struct_cachep = + kmem_cache_create("task_struct", sizeof(struct task_struct), + ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); + + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); + SLAB_PANIC | SLAB_NOTRACK | SLAB_USERCOPY, NULL); +} + +struct task_struct *alloc_task_struct(void) +{ + return kmem_cache_alloc(task_struct_cachep, GFP_KERNEL); +} + +void free_task_struct(struct task_struct *task) +{ + free_thread_xstate(task); + kmem_cache_free(task_struct_cachep, task); } /* @@ -73,7 +90,7 @@ void exit_thread(void) unsigned long *bp = t->io_bitmap_ptr; if (bp) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss = init_tss + get_cpu(); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); @@ -93,6 +110,9 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); +#if defined(CONFIG_X86_32) && !defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_PAX_MEMORY_UDEREF) + loadsegment(gs, 0); +#endif tsk->thread.debugreg0 = 0; tsk->thread.debugreg1 = 0; tsk->thread.debugreg2 = 0; @@ -307,7 +327,7 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -void stop_this_cpu(void *dummy) +__noreturn void stop_this_cpu(void *dummy) { local_irq_disable(); /* @@ -568,16 +588,38 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -unsigned long arch_align_stack(unsigned long sp) +#ifdef CONFIG_PAX_RANDKSTACK +void pax_randomize_kstack(struct pt_regs *regs) { - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} + struct thread_struct *thread = ¤t->thread; + unsigned long time; -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; + if (!randomize_va_space) + return; + + if (v8086_mode(regs)) + return; + + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x3EUL; + time <<= 2; +#elif defined(CONFIG_X86_64) + time &= 0xFUL; + time <<= 4; +#else + time &= 0x1FUL; + time <<= 3; +#endif + + thread->sp0 ^= time; + load_sp0(init_tss + smp_processor_id(), thread); + +#ifdef CONFIG_X86_64 + percpu_write(kernel_stack, thread->sp0); +#endif } +#endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c40c432..6e1df72 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -67,6 +67,7 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); unsigned long thread_saved_pc(struct task_struct *tsk) { return ((unsigned long *)tsk->thread.sp)[3]; +//XXX return tsk->thread.eip; } #ifndef CONFIG_SMP @@ -129,15 +130,14 @@ void __show_regs(struct pt_regs *regs, int all) unsigned short ss, gs; const char *board; - if (user_mode_vm(regs)) { + if (user_mode(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; - gs = get_user_gs(regs); } else { sp = (unsigned long) (®s->sp); savesegment(ss, ss); - savesegment(gs, gs); } + gs = get_user_gs(regs); printk("\n"); @@ -210,10 +210,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.bx = (unsigned long) fn; regs.dx = (unsigned long) arg; - regs.ds = __USER_DS; - regs.es = __USER_DS; + regs.ds = __KERNEL_DS; + regs.es = __KERNEL_DS; regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; + savesegment(gs, regs.gs); regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); @@ -247,13 +247,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); + p->tinfo.lowest_stack = (unsigned long)task_stack_page(p); p->thread.ip = (unsigned long) ret_from_fork; @@ -345,7 +346,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; bool preload_fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -380,6 +381,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ lazy_save_gs(prev->gs); +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(task_thread_info(next_p)->addr_limit); +#endif + /* * Load the per-thread Thread-Local Storage descriptor. */ @@ -415,6 +420,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); + percpu_write(current_task, next_p); + percpu_write(current_tinfo, &next_p->tinfo); + if (preload_fpu) __math_state_restore(); @@ -424,8 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) lazy_load_gs(next->gs); - percpu_write(current_task, next_p); - return prev_p; } @@ -495,4 +501,3 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } - diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 39493bc..196816d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -91,7 +91,7 @@ static void __exit_idle(void) void exit_idle(void) { /* idle loop has pid 0 */ - if (current->pid) + if (task_pid_nr(current)) return; __exit_idle(); } @@ -170,7 +170,7 @@ void __show_regs(struct pt_regs *regs, int all) if (!board) board = ""; printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", - current->pid, current->comm, print_tainted(), + task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version, board); @@ -280,8 +280,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *childregs; struct task_struct *me = current; - childregs = ((struct pt_regs *) - (THREAD_SIZE + task_stack_page(p))) - 1; + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 16; *childregs = *regs; childregs->ax = 0; @@ -292,6 +291,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); p->thread.usersp = me->thread.usersp; + p->tinfo.lowest_stack = (unsigned long)task_stack_page(p); set_tsk_thread_flag(p, TIF_FORK); @@ -379,7 +379,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; unsigned fsindex, gsindex; bool preload_fpu; @@ -475,10 +475,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) prev->usersp = percpu_read(old_rsp); percpu_write(old_rsp, next->usersp); percpu_write(current_task, next_p); + percpu_write(current_tinfo, &next_p->tinfo); - percpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - KERNEL_STACK_OFFSET); + percpu_write(kernel_stack, next->sp0); /* * Now maybe reload the debug registers and handle I/O bitmaps @@ -559,12 +558,11 @@ unsigned long get_wchan(struct task_struct *p) if (!p || p == current || p->state == TASK_RUNNING) return 0; stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE-16-sizeof(u64)) return 0; fp = *(u64 *)(p->thread.sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < stack || fp > stack+THREAD_SIZE-16-sizeof(u64)) return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c06acdd..3f5fff5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -925,7 +925,7 @@ static const struct user_regset_view user_x86_32_view; /* Initialized below. */ long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; - unsigned long __user *datap = (unsigned long __user *)data; + unsigned long __user *datap = (__force unsigned long __user *)data; switch (request) { /* read the word at location addr in the USER area. */ @@ -1012,14 +1012,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (addr < 0) return -EIO; ret = do_get_thread_area(child, addr, - (struct user_desc __user *) data); + (__force struct user_desc __user *) data); break; case PTRACE_SET_THREAD_AREA: if (addr < 0) return -EIO; ret = do_set_thread_area(child, addr, - (struct user_desc __user *) data, 0); + (__force struct user_desc __user *) data, 0); break; #endif @@ -1038,12 +1038,12 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) #ifdef CONFIG_X86_PTRACE_BTS case PTRACE_BTS_CONFIG: ret = ptrace_bts_config - (child, data, (struct ptrace_bts_config __user *)addr); + (child, data, (__force struct ptrace_bts_config __user *)addr); break; case PTRACE_BTS_STATUS: ret = ptrace_bts_status - (child, data, (struct ptrace_bts_config __user *)addr); + (child, data, (__force struct ptrace_bts_config __user *)addr); break; case PTRACE_BTS_SIZE: @@ -1052,7 +1052,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_BTS_GET: ret = ptrace_bts_read_record - (child, data, (struct bts_struct __user *) addr); + (child, data, (__force struct bts_struct __user *) addr); break; case PTRACE_BTS_CLEAR: @@ -1061,7 +1061,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PTRACE_BTS_DRAIN: ret = ptrace_bts_drain - (child, data, (struct bts_struct __user *) addr); + (child, data, (__force struct bts_struct __user *) addr); break; #endif /* CONFIG_X86_PTRACE_BTS */ @@ -1450,7 +1450,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, info.si_code = si_code; /* User-mode ip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; + info.si_addr = user_mode(regs) ? (__force void __user *) regs->ip : NULL; /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); @@ -1469,7 +1469,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. */ -asmregparm long syscall_trace_enter(struct pt_regs *regs) +long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; @@ -1514,7 +1514,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) return ret ?: regs->orig_ax; } -asmregparm void syscall_trace_leave(struct pt_regs *regs) +void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index cf98100..e76e03d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -33,7 +33,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; -static int reboot_mode; +static unsigned short reboot_mode; enum reboot_type reboot_type = BOOT_KBD; int reboot_force; @@ -292,12 +292,12 @@ core_initcall(reboot_init); controller to pulse the CPU reset line, which is more thorough, but doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static const unsigned long long -real_mode_gdt_entries [3] = +static struct desc_struct +real_mode_gdt_entries [3] __read_only = { - 0x0000000000000000ULL, /* Null descriptor */ - 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + GDT_ENTRY_INIT(0, 0, 0), /* Null descriptor */ + GDT_ENTRY_INIT(0x9b, 0, 0xffff), /* 16-bit real-mode 64k code at 0x00000000 */ + GDT_ENTRY_INIT(0x93, 0x100, 0xffff) /* 16-bit real-mode 64k data at 0x00000100 */ }; static const struct desc_ptr @@ -346,7 +346,7 @@ static const unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(const unsigned char *code, int length) +__noreturn void machine_real_restart(const unsigned char *code, unsigned int length) { local_irq_disable(); @@ -366,8 +366,8 @@ void machine_real_restart(const unsigned char *code, int length) /* Remap the kernel at virtual address zero, as well as offset zero from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); /* * Use `swapper_pg_dir' as our page directory. @@ -379,16 +379,15 @@ void machine_real_restart(const unsigned char *code, int length) boot)". This seems like a fairly standard thing that gets set by REBOOT.COM programs, and the previous reset routine did this too. */ - *((unsigned short *)0x472) = reboot_mode; + *(unsigned short *)(__va(0x472)) = reboot_mode; /* For the switch to real mode, copy some code to low memory. It has to be in the first 64k because it is running in 16-bit mode, and it has to have the same physical and virtual address, because it turns off paging. Copy it near the end of the first page, out of the way of BIOS variables. */ - memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), - real_mode_switch, sizeof (real_mode_switch)); - memcpy((void *)(0x1000 - 100), code, length); + memcpy(__va(0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); + memcpy(__va(0x1000 - 100), code, length); /* Set up the IDT for real mode. */ load_idt(&real_mode_idt); @@ -416,6 +415,7 @@ void machine_real_restart(const unsigned char *code, int length) __asm__ __volatile__ ("ljmp $0x0008,%0" : : "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100))); + do { } while (1); } #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(machine_real_restart); @@ -544,7 +544,7 @@ void __attribute__((weak)) mach_reboot_fixups(void) { } -static void native_machine_emergency_restart(void) +__noreturn static void native_machine_emergency_restart(void) { int i; @@ -659,13 +659,13 @@ void native_machine_shutdown(void) #endif } -static void __machine_emergency_restart(int emergency) +static __noreturn void __machine_emergency_restart(int emergency) { reboot_emergency = emergency; machine_ops.emergency_restart(); } -static void native_machine_restart(char *__unused) +static __noreturn void native_machine_restart(char *__unused) { printk("machine restart\n"); @@ -674,7 +674,7 @@ static void native_machine_restart(char *__unused) __machine_emergency_restart(0); } -static void native_machine_halt(void) +static __noreturn void native_machine_halt(void) { /* stop other cpus and apics */ machine_shutdown(); @@ -685,7 +685,7 @@ static void native_machine_halt(void) stop_this_cpu(NULL); } -static void native_machine_power_off(void) +__noreturn static void native_machine_power_off(void) { if (pm_power_off) { if (!reboot_force) @@ -694,6 +694,7 @@ static void native_machine_power_off(void) } /* a fallback in case there is no PM info available */ tboot_shutdown(TB_SHUTDOWN_HALT); + do { } while (1); } struct machine_ops machine_ops = { diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 7a6f3b3..976a959 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -11,6 +11,7 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function @@ -167,6 +168,7 @@ identity_mapped: xorq %r14, %r14 xorq %r15, %r15 + pax_force_retaddr 0, 1 ret 1: diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5449a26..0b6c759 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -783,14 +783,14 @@ void __init setup_arch(char **cmdline_p) if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; + init_mm.start_code = ktla_ktva((unsigned long) _text); + init_mm.end_code = ktla_ktva((unsigned long) _etext); init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + code_resource.start = virt_to_phys(ktla_ktva(_text)); + code_resource.end = virt_to_phys(ktla_ktva(_etext))-1; + data_resource.start = virt_to_phys(_sdata); data_resource.end = virt_to_phys(_edata)-1; bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d559af9..524c6ad 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -25,19 +25,17 @@ # define DBG(x...) #endif -DEFINE_PER_CPU(int, cpu_number); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +#endif -#ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) -#else -#define BOOT_PERCPU_OFFSET 0 -#endif DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { +unsigned long __per_cpu_offset[NR_CPUS] __read_only = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); @@ -159,10 +157,10 @@ static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 struct desc_struct gdt; + unsigned long base = per_cpu_offset(cpu); - pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; + pack_descriptor(&gdt, base, (VMALLOC_END - base - 1) >> PAGE_SHIFT, + 0x83 | DESCTYPE_S, 0xC); write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); #endif @@ -212,6 +210,11 @@ void __init setup_per_cpu_areas(void) /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { +#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_X86_32 + unsigned long canary = per_cpu(stack_canary.canary, cpu); +#endif +#endif per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; @@ -239,6 +242,12 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif #endif +#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_X86_32 + if (!cpu) + per_cpu(stack_canary.canary, cpu) = canary; +#endif +#endif /* * Up to this point, the boot CPU has been using .data.init * area. Reload any changed state for the boot CPU. diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76..a9287a1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -197,7 +197,7 @@ static unsigned long align_sigframe(unsigned long sp) * Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - sp = ((sp + 4) & -16ul) - 4; + sp = ((sp - 12) & -16ul) - 4; #else /* !CONFIG_X86_32 */ sp = round_down(sp, 16) - 8; #endif @@ -248,11 +248,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, * Return an always-bogus address instead so we will die with SIGSEGV. */ if (onsigstack && !likely(on_sig_stack(sp))) - return (void __user *)-1L; + return (__force void __user *)-1L; /* save i387 state */ if (used_math() && save_i387_xstate(*fpstate) < 0) - return (void __user *)-1L; + return (__force void __user *)-1L; return (void __user *)sp; } @@ -307,9 +307,9 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, } if (current->mm->context.vdso) - restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); + restorer = (__force void __user *)VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); else - restorer = &frame->retcode; + restorer = (void __user *)&frame->retcode; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -323,7 +323,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, * reasons and because gdb uses it as a signature to notice * signal handler stack frames. */ - err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); + err |= __put_user(*((u64 *)&retcode), (u64 __user *)frame->retcode); if (err) return -EFAULT; @@ -377,7 +377,10 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (current->mm->context.vdso) + restorer = (__force void __user *)VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + else + restorer = (void __user *)&frame->retcode; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; put_user_ex(restorer, &frame->pretcode); @@ -389,7 +392,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * reasons and because gdb uses it as a signature to notice * signal handler stack frames. */ - put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&rt_retcode), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -782,6 +785,8 @@ static void do_signal(struct pt_regs *regs) int signr; sigset_t *oldset; + pax_track_stack(); + /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything @@ -789,7 +794,7 @@ static void do_signal(struct pt_regs *regs) * X86_32: vm86 regs switched out by assembly code before reaching * here, so testing against kernel CS suffices. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) return; if (current_thread_info()->status & TS_RESTORE_SIGMASK) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7e8e905..64d5c32 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -94,14 +94,14 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); */ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); -void cpu_hotplug_driver_lock() +void cpu_hotplug_driver_lock(void) { - mutex_lock(&x86_cpu_hotplug_driver_mutex); + mutex_lock(&x86_cpu_hotplug_driver_mutex); } -void cpu_hotplug_driver_unlock() +void cpu_hotplug_driver_unlock(void) { - mutex_unlock(&x86_cpu_hotplug_driver_mutex); + mutex_unlock(&x86_cpu_hotplug_driver_mutex); } ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } @@ -625,7 +625,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) * target processor state. */ startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, - (unsigned long)stack_start.sp); + stack_start); /* * Run STARTUP IPI loop. @@ -743,6 +743,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: per_cpu(current_task, cpu) = c_idle.idle; + per_cpu(current_tinfo, cpu) = &c_idle.idle->tinfo; #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); @@ -750,13 +751,15 @@ do_rest: #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(c_idle.idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; + per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(c_idle.idle) - 16 + THREAD_SIZE; #endif + + pax_open_kernel(); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + pax_close_kernel(); + initial_code = (unsigned long)start_secondary; - stack_start.sp = (void *) c_idle.idle->thread.sp; + stack_start = c_idle.idle->thread.sp; /* start_ip had better be page-aligned! */ start_ip = setup_trampoline(); @@ -891,6 +894,12 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; +#ifdef CONFIG_PAX_PER_CPU_PGD + clone_pgd_range(get_cpu_pgd(cpu) + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); +#endif + err = do_boot_cpu(apicid, cpu); if (err) { diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 3149032..14f1053 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -27,10 +27,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(seg >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { desc = child->mm->context.ldt + seg; @@ -42,7 +42,8 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re addr += base; } mutex_unlock(&child->mm->context.lock); - } + } else if (seg == __KERNEL_CS || seg == __KERNEXEC_KERNEL_CS) + addr = ktla_ktva(addr); return addr; } @@ -53,6 +54,9 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) unsigned char opcode[15]; unsigned long addr = convert_ip_to_linear(child, regs); + if (addr == -EINVAL) + return 0; + copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); for (i = 0; i < copied; i++) { switch (opcode[i]) { @@ -74,7 +78,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) #ifdef CONFIG_X86_64 case 0x40 ... 0x4f: - if (regs->cs != __USER_CS) + if ((regs->cs & 0xffff) != __USER_CS) /* 32-bit mode: register increment */ return 0; /* 64-bit mode: REX prefix */ diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index dee1ff7..a397f7f 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -24,6 +24,21 @@ #include +int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) +{ + unsigned long pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > pax_task_size || addr > pax_task_size - len) + return -EINVAL; + + return 0; +} + /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -58,6 +73,212 @@ out: return err; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; + + if (len > pax_task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + if (pax_task_size - len >= addr) { + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, addr, len)) + return addr; + } + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +#ifdef CONFIG_PAX_PAGEEXEC + if (!nx_enabled && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { + start_addr = 0x00110000UL; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + start_addr += mm->delta_mmap & 0x03FFF000UL; +#endif + + if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) + start_addr = addr = mm->mmap_base; + else + addr = start_addr; + } +#endif + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (pax_task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (check_heap_stack_gap(vma, addr, len)) + break; + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (mm->start_brk <= addr && addr < mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + } + + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; + + /* requested length too big for entire address space */ + if (len > pax_task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + +#ifdef CONFIG_PAX_PAGEEXEC + if (!nx_enabled && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) + goto bottomup; +#endif + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + if (pax_task_size - len >= addr) { + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, addr, len)) + return addr; + } + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (check_heap_stack_gap(vma, addr - len, len)) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base-len; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, addr, len)) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = skip_heap_stack_gap(vma, len); + } while (!IS_ERR_VALUE(addr)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; + else +#endif + + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} struct sel_arg_struct { unsigned long n; @@ -93,7 +314,7 @@ asmlinkage int sys_ipc(uint call, int first, int second, return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); case SEMTIMEDOP: return sys_semtimedop(first, (struct sembuf __user *)ptr, second, - (const struct timespec __user *)fifth); + (__force const struct timespec __user *)fifth); case SEMGET: return sys_semget(first, second, third); @@ -140,7 +361,7 @@ asmlinkage int sys_ipc(uint call, int first, int second, ret = do_shmat(first, (char __user *) ptr, second, &raddr); if (ret) return ret; - return put_user(raddr, (ulong __user *) third); + return put_user(raddr, (__force ulong __user *) third); } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) @@ -207,17 +428,3 @@ asmlinkage int sys_olduname(struct oldold_utsname __user *name) return error; } - - -/* - * Do a system call from kernel instead of calling sys_execve so we - * end up with proper pt_regs. - */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) -{ - long __res; - asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" - : "=a" (__res) - : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); - return __res; -} diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 8aa2057..b604bc1 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -32,8 +32,8 @@ out: return error; } -static void find_start_end(unsigned long flags, unsigned long *begin, - unsigned long *end) +static void find_start_end(struct mm_struct *mm, unsigned long flags, + unsigned long *begin, unsigned long *end) { if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { unsigned long new_begin; @@ -52,7 +52,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = TASK_UNMAPPED_BASE; + *begin = mm->mmap_base; *end = TASK_SIZE; } } @@ -69,16 +69,19 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (flags & MAP_FIXED) return addr; - find_start_end(flags, &begin, &end); + find_start_end(mm, flags, &begin, &end); if (len > end) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (end - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (end - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) @@ -106,7 +109,7 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, addr, len)) { /* * Remember the place where we stopped the search: */ @@ -128,7 +131,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long base = mm->mmap_base, addr = addr0; /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -141,13 +144,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) goto bottomup; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) - return addr; + if (TASK_SIZE - len >= addr) { + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, addr, len)) + return addr; + } } /* check if free_area_cache is useful for us */ @@ -162,7 +170,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (addr > len) { vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) + if (check_heap_stack_gap(vma, addr - len, len)) /* remember the address as a hint for next time */ return mm->free_area_cache = addr-len; } @@ -179,7 +187,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, * return with success: */ vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) + if (check_heap_stack_gap(vma, addr, len)) /* remember the address as a hint for next time */ return mm->free_area_cache = addr; @@ -188,8 +196,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len < vma->vm_start); + addr = skip_heap_stack_gap(vma, len); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -198,13 +206,21 @@ bottomup: * can happen with large stack limits and large mmap() * allocations. */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ - mm->free_area_cache = mm->mmap_base; + mm->mmap_base = base; + mm->free_area_cache = base; mm->cached_hole_size = ~0UL; return addr; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 76d70a4..4c94a44 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -1,3 +1,4 @@ +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 46b8277..3349d55 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -216,7 +216,7 @@ static int tboot_setup_sleep(void) void tboot_shutdown(u32 shutdown_type) { - void (*shutdown)(void); + void (* __noreturn shutdown)(void); if (!tboot_enabled()) return; @@ -238,7 +238,7 @@ void tboot_shutdown(u32 shutdown_type) switch_to_tboot_pt(); - shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; + shutdown = (void *)tboot->shutdown_entry; shutdown(); /* should not reach here */ @@ -295,7 +295,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) tboot_shutdown(acpi_shutdown_map[sleep_state]); } -static atomic_t ap_wfs_count; +static atomic_unchecked_t ap_wfs_count; static int tboot_wait_for_aps(int num_aps) { @@ -319,9 +319,9 @@ static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, { switch (action) { case CPU_DYING: - atomic_inc(&ap_wfs_count); + atomic_inc_unchecked(&ap_wfs_count); if (num_online_cpus() == 1) - if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) + if (tboot_wait_for_aps(atomic_read_unchecked(&ap_wfs_count))) return NOTIFY_BAD; break; } @@ -340,7 +340,7 @@ static __init int tboot_late_init(void) tboot_create_trampoline(); - atomic_set(&ap_wfs_count, 0); + atomic_set_unchecked(&ap_wfs_count, 0); register_hotcpu_notifier(&tboot_cpu_notifier); return 0; } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be25734..87fe232 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -26,17 +26,13 @@ int timer_ack; #endif -#ifdef CONFIG_X86_64 -volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (!user_mode_vm(regs) && in_lock_functions(pc)) { + if (!user_mode(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); + return ktla_ktva(*(unsigned long *)(regs->bp + sizeof(long))); #else unsigned long *sp = (unsigned long *)kernel_stack_pointer(regs); @@ -45,11 +41,17 @@ unsigned long profile_pc(struct pt_regs *regs) * or above a saved flags. Eflags has bits 22-31 zero, * kernel addresses don't. */ + +#ifdef CONFIG_PAX_KERNEXEC + return ktla_ktva(sp[0]); +#else if (sp[0] >> 22) return sp[0]; if (sp[1] >> 22) return sp[1]; #endif + +#endif } return pc; } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 6bb7b85..dd853e1 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -85,6 +85,11 @@ int do_set_thread_area(struct task_struct *p, int idx, if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; +#ifdef CONFIG_PAX_SEGMEXEC + if ((p->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + set_tls_desc(p, idx, &info, 1); return 0; diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 8508237..229b664 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -32,6 +32,12 @@ #include #include +#ifdef CONFIG_PAX_KERNEXEC +#define ta(X) (X) +#else +#define ta(X) ((X) - __PAGE_OFFSET) +#endif + /* We can free up trampoline after bootup if cpu hotplug is not supported. */ __CPUINITRODATA .code16 @@ -60,7 +66,7 @@ r_base = . inc %ax # protected mode (PE) bit lmsw %ax # into protected mode # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S - ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) + ljmpl $__BOOT_CS, $ta(startup_32_smp) # These need to be in the same 64K segment as the above; # hence we don't use the boot_gdt_descr defined in head.S diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 3af2dff..ba8aa49 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -91,7 +91,7 @@ startup_32: movl $__KERNEL_DS, %eax # Initialize the %ds segment register movl %eax, %ds - movl $X86_CR4_PAE, %eax + movl $(X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE), %eax movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables @@ -127,7 +127,7 @@ startup_64: no_longmode: hlt jmp no_longmode -#include "verify_cpu_64.S" +#include "verify_cpu.S" # Careful these need to be in the same 64K segment as the above; tidt: @@ -138,7 +138,7 @@ tidt: # so the kernel can live anywhere .balign 4 tgdt: - .short tgdt_end - tgdt # gdt limit + .short tgdt_end - tgdt - 1 # gdt limit .long tgdt - r_base .short 0 .quad 0x00cf9b000000ffff # __KERNEL32_CS diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dce..ec3f8e5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -69,12 +69,6 @@ asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq; - -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround. - */ -gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; #endif DECLARE_BITMAP(used_vectors, NR_VECTORS); @@ -112,19 +106,19 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) static inline void die_if_kernel(const char *str, struct pt_regs *regs, long err) { - if (!user_mode_vm(regs)) + if (!user_mode(regs)) die(str, regs, err); } #endif static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, +do_trap(int trapnr, int signr, const char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { struct task_struct *tsk = current; #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { /* * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. @@ -135,7 +129,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } #endif - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto kernel_trap; #ifdef CONFIG_X86_32 @@ -158,7 +152,7 @@ trap_signal: printk_ratelimit()) { printk(KERN_INFO "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, + tsk->comm, task_pid_nr(tsk), str, regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); @@ -175,8 +169,20 @@ kernel_trap: if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + if (trapnr == 12 && ((regs->cs & 0xFFFF) == __KERNEL_CS || (regs->cs & 0xFFFF) == __KERNEXEC_KERNEL_CS)) + str = "PAX: suspicious stack segment fault"; +#endif + die(str, regs, error_code); } + +#ifdef CONFIG_PAX_REFCOUNT + if (trapnr == 4) + pax_report_refcount_overflow(regs); +#endif + return; #ifdef CONFIG_X86_32 @@ -265,14 +271,30 @@ do_general_protection(struct pt_regs *regs, long error_code) conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) + if (v8086_mode(regs)) goto gp_in_vm86; #endif tsk = current; - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto gp_in_kernel; +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) + if (!nx_enabled && tsk->mm && (tsk->mm->pax_flags & MF_PAX_PAGEEXEC)) { + struct mm_struct *mm = tsk->mm; + unsigned long limit; + + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, VM_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } +#endif + tsk->thread.error_code = error_code; tsk->thread.trap_no = 13; @@ -305,6 +327,13 @@ gp_in_kernel: if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + if ((regs->cs & 0xFFFF) == __KERNEL_CS || (regs->cs & 0xFFFF) == __KERNEXEC_KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } @@ -435,6 +464,17 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + if (!user_mode(regs)) { + unsigned long cs = regs->cs & 0xFFFF; + unsigned long ip = ktva_ktla(regs->ip); + + if ((cs == __KERNEL_CS || cs == __KERNEXEC_KERNEL_CS) && ip <= (unsigned long)_etext) + regs->ip = ip; + } +#endif + nmi_enter(); inc_irq_stat(__nmi_count); @@ -558,7 +598,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) } #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) + if (v8086_mode(regs)) goto debug_vm86; #endif @@ -570,7 +610,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) * kernel space (but re-enable TF when returning to user mode). */ if (condition & DR_STEP) { - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto clear_TF_reenable; } @@ -757,7 +797,7 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code) * Handle strange cache flush from user space exception * in all other cases. This is undocumented behaviour. */ - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); return; } @@ -798,7 +838,7 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) void __math_state_restore(void) { struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. @@ -825,8 +865,7 @@ void __math_state_restore(void) */ asmlinkage void math_state_restore(void) { - struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; if (!tsk_used_math(tsk)) { local_irq_enable(); diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S new file mode 100644 index 0000000..50c5edd --- /dev/null +++ b/arch/x86/kernel/verify_cpu.S @@ -0,0 +1,140 @@ +/* + * + * verify_cpu.S - Code for cpu long mode and SSE verification. This + * code has been borrowed from boot/setup.S and was introduced by + * Andi Kleen. + * + * Copyright (c) 2007 Andi Kleen (ak@suse.de) + * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) + * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) + * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com) + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + * This is a common code for verification whether CPU supports + * long mode and SSE or not. It is not called directly instead this + * file is included at various places and compiled in that context. + * This file is expected to run in 32bit code. Currently: + * + * arch/x86/boot/compressed/head_64.S: Boot cpu verification + * arch/x86/kernel/trampoline_64.S: secondary processor verification + * arch/x86/kernel/head_32.S: processor startup + * arch/x86/kernel/acpi/realmode/wakeup.S: 32bit processor resume + * + * verify_cpu, returns the status of longmode and SSE in register %eax. + * 0: Success 1: Failure + * + * On Intel, the XD_DISABLE flag will be cleared as a side-effect. + * + * The caller needs to check for the error code and take the action + * appropriately. Either display a message or halt. + */ + +#include +#include + +verify_cpu: + pushfl # Save caller passed flags + pushl $0 # Kill any dangerous flags + popfl + + pushfl # standard way to check for cpuid + popl %eax + movl %eax,%ebx + xorl $0x200000,%eax + pushl %eax + popfl + pushfl + popl %eax + cmpl %eax,%ebx + jz verify_cpu_no_longmode # cpu has no cpuid + + movl $0x0,%eax # See if cpuid 1 is implemented + cpuid + cmpl $0x1,%eax + jb verify_cpu_no_longmode # no cpuid 1 + + xor %di,%di + cmpl $0x68747541,%ebx # AuthenticAMD + jnz verify_cpu_noamd + cmpl $0x69746e65,%edx + jnz verify_cpu_noamd + cmpl $0x444d4163,%ecx + jnz verify_cpu_noamd + mov $1,%di # cpu is from AMD + jmp verify_cpu_check + +verify_cpu_noamd: + cmpl $0x756e6547,%ebx # GenuineIntel? + jnz verify_cpu_check + cmpl $0x49656e69,%edx + jnz verify_cpu_check + cmpl $0x6c65746e,%ecx + jnz verify_cpu_check + + # only call IA32_MISC_ENABLE when: + # family > 6 || (family == 6 && model >= 0xd) + movl $0x1, %eax # check CPU family and model + cpuid + movl %eax, %ecx + + andl $0x0ff00f00, %eax # mask family and extended family + shrl $8, %eax + cmpl $6, %eax + ja verify_cpu_clear_xd # family > 6, ok + jb verify_cpu_check # family < 6, skip + + andl $0x000f00f0, %ecx # mask model and extended model + shrl $4, %ecx + cmpl $0xd, %ecx + jb verify_cpu_check # family == 6, model < 0xd, skip + +verify_cpu_clear_xd: + movl $MSR_IA32_MISC_ENABLE, %ecx + rdmsr + btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE + jnc verify_cpu_check # only write MSR if bit was changed + wrmsr + +verify_cpu_check: + movl $0x1,%eax # Does the cpu have what it takes + cpuid + andl $REQUIRED_MASK0,%edx + xorl $REQUIRED_MASK0,%edx + jnz verify_cpu_no_longmode + + movl $0x80000000,%eax # See if extended cpuid is implemented + cpuid + cmpl $0x80000001,%eax + jb verify_cpu_no_longmode # no extended cpuid + + movl $0x80000001,%eax # Does the cpu have what it takes + cpuid + andl $REQUIRED_MASK1,%edx + xorl $REQUIRED_MASK1,%edx + jnz verify_cpu_no_longmode + +verify_cpu_sse_test: + movl $1,%eax + cpuid + andl $SSE_MASK,%edx + cmpl $SSE_MASK,%edx + je verify_cpu_sse_ok + test %di,%di + jz verify_cpu_no_longmode # only try to force SSE on AMD + movl $MSR_K7_HWCR,%ecx + rdmsr + btr $15,%eax # enable SSE + wrmsr + xor %di,%di # don't loop + jmp verify_cpu_sse_test # try again + +verify_cpu_no_longmode: + popfl # Restore caller passed flags + movl $1,%eax + ret +verify_cpu_sse_ok: + popfl # Restore caller passed flags + xorl %eax, %eax + ret diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S deleted file mode 100644 index 45b6f8a..0000000 --- a/arch/x86/kernel/verify_cpu_64.S +++ /dev/null @@ -1,105 +0,0 @@ -/* - * - * verify_cpu.S - Code for cpu long mode and SSE verification. This - * code has been borrowed from boot/setup.S and was introduced by - * Andi Kleen. - * - * Copyright (c) 2007 Andi Kleen (ak@suse.de) - * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) - * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - * - * This is a common code for verification whether CPU supports - * long mode and SSE or not. It is not called directly instead this - * file is included at various places and compiled in that context. - * Following are the current usage. - * - * This file is included by both 16bit and 32bit code. - * - * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) - * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) - * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) - * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) - * - * verify_cpu, returns the status of cpu check in register %eax. - * 0: Success 1: Failure - * - * The caller needs to check for the error code and take the action - * appropriately. Either display a message or halt. - */ - -#include - -verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl - - pushfl # standard way to check for cpuid - popl %eax - movl %eax,%ebx - xorl $0x200000,%eax - pushl %eax - popfl - pushfl - popl %eax - cmpl %eax,%ebx - jz verify_cpu_no_longmode # cpu has no cpuid - - movl $0x0,%eax # See if cpuid 1 is implemented - cpuid - cmpl $0x1,%eax - jb verify_cpu_no_longmode # no cpuid 1 - - xor %di,%di - cmpl $0x68747541,%ebx # AuthenticAMD - jnz verify_cpu_noamd - cmpl $0x69746e65,%edx - jnz verify_cpu_noamd - cmpl $0x444d4163,%ecx - jnz verify_cpu_noamd - mov $1,%di # cpu is from AMD - -verify_cpu_noamd: - movl $0x1,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK0,%edx - xorl $REQUIRED_MASK0,%edx - jnz verify_cpu_no_longmode - - movl $0x80000000,%eax # See if extended cpuid is implemented - cpuid - cmpl $0x80000001,%eax - jb verify_cpu_no_longmode # no extended cpuid - - movl $0x80000001,%eax # Does the cpu have what it takes - cpuid - andl $REQUIRED_MASK1,%edx - xorl $REQUIRED_MASK1,%edx - jnz verify_cpu_no_longmode - -verify_cpu_sse_test: - movl $1,%eax - cpuid - andl $SSE_MASK,%edx - cmpl $SSE_MASK,%edx - je verify_cpu_sse_ok - test %di,%di - jz verify_cpu_no_longmode # only try to force SSE on AMD - movl $0xc0010015,%ecx # HWCR - rdmsr - btr $15,%eax # enable SSE - wrmsr - xor %di,%di # don't loop - jmp verify_cpu_sse_test # try again - -verify_cpu_no_longmode: - popfl # Restore caller passed flags - movl $1,%eax - ret -verify_cpu_sse_ok: - popfl # Restore caller passed flags - xorl %eax, %eax - ret diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e625..c992817 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -148,7 +149,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.sp0 = current->thread.saved_sp0; current->thread.sysenter_cs = __KERNEL_CS; load_sp0(tss, ¤t->thread); @@ -208,6 +209,13 @@ int sys_vm86old(struct pt_regs *regs) struct task_struct *tsk; int tmp, ret = -EPERM; +#ifdef CONFIG_GRKERNSEC_VM86 + if (!capable(CAP_SYS_RAWIO)) { + gr_handle_vm86(); + goto out; + } +#endif + tsk = current; if (tsk->thread.saved_sp0) goto out; @@ -238,6 +246,14 @@ int sys_vm86(struct pt_regs *regs) int tmp, ret; struct vm86plus_struct __user *v86; +#ifdef CONFIG_GRKERNSEC_VM86 + if (!capable(CAP_SYS_RAWIO)) { + gr_handle_vm86(); + ret = -EPERM; + goto out; + } +#endif + tsk = current; switch (regs->bx) { case VM86_REQUEST_IRQ: @@ -324,7 +340,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_gs = get_user_gs(info->regs32); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; @@ -529,7 +545,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, goto cannot_handle; if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) goto cannot_handle; - intr_ptr = (unsigned long __user *) (i << 2); + intr_ptr = (__force unsigned long __user *) (i << 2); if (get_user(segoffs, intr_ptr)) goto cannot_handle; if ((segoffs >> 16) == BIOSSEG) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index d430e4c..831f817 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -44,12 +44,17 @@ typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); #define call_vrom_func(rom,func) \ - (((VROMFUNC *)(rom->func))()) + (((VROMFUNC *)(ktva_ktla(rom.func)))()) #define call_vrom_long_func(rom,func,arg) \ - (((VROMLONGFUNC *)(rom->func)) (arg)) +({\ + u64 __reloc = ((VROMLONGFUNC *)(ktva_ktla(rom.func))) (arg);\ + struct vmi_relocation_info *const __rel = (struct vmi_relocation_info *)&__reloc;\ + __rel->eip = (unsigned char *)ktva_ktla((unsigned long)__rel->eip);\ + __reloc;\ +}) -static struct vrom_header *vmi_rom; +static struct vrom_header vmi_rom __attribute((__section__(".vmi.rom"), __aligned__(PAGE_SIZE))); static int disable_pge; static int disable_pse; static int disable_sep; @@ -76,10 +81,10 @@ static struct { void (*set_initial_ap_state)(int, int); void (*halt)(void); void (*set_lazy_mode)(int mode); -} vmi_ops; +} __no_const vmi_ops __read_only; /* Cached VMI operations */ -struct vmi_timer_ops vmi_timer_ops; +struct vmi_timer_ops vmi_timer_ops __read_only; /* * VMI patching routines. @@ -94,7 +99,7 @@ struct vmi_timer_ops vmi_timer_ops; static inline void patch_offset(void *insnbuf, unsigned long ip, unsigned long dest) { - *(unsigned long *)(insnbuf+1) = dest-ip-5; + *(unsigned long *)(insnbuf+1) = dest-ip-5; } static unsigned patch_internal(int call, unsigned len, void *insnbuf, @@ -102,6 +107,7 @@ static unsigned patch_internal(int call, unsigned len, void *insnbuf, { u64 reloc; struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; + reloc = call_vrom_long_func(vmi_rom, get_reloc, call); switch(rel->type) { case VMI_RELOCATION_CALL_REL: @@ -404,13 +410,13 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval) static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - const pte_t pte = { .pte = 0 }; + const pte_t pte = __pte(0ULL); vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); } static void vmi_pmd_clear(pmd_t *pmd) { - const pte_t pte = { .pte = 0 }; + const pte_t pte = __pte(0ULL); vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); } #endif @@ -438,10 +444,10 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, ap.ss = __KERNEL_DS; ap.esp = (unsigned long) start_esp; - ap.ds = __USER_DS; - ap.es = __USER_DS; + ap.ds = __KERNEL_DS; + ap.es = __KERNEL_DS; ap.fs = __KERNEL_PERCPU; - ap.gs = __KERNEL_STACK_CANARY; + savesegment(gs, ap.gs); ap.eflags = 0; @@ -486,6 +492,18 @@ static void vmi_leave_lazy_mmu(void) paravirt_leave_lazy_mmu(); } +#ifdef CONFIG_PAX_KERNEXEC +static unsigned long vmi_pax_open_kernel(void) +{ + return 0; +} + +static unsigned long vmi_pax_close_kernel(void) +{ + return 0; +} +#endif + static inline int __init check_vmi_rom(struct vrom_header *rom) { struct pci_header *pci; @@ -498,6 +516,10 @@ static inline int __init check_vmi_rom(struct vrom_header *rom) return 0; if (rom->vrom_signature != VMI_SIGNATURE) return 0; + if (rom->rom_length * 512 > sizeof(*rom)) { + printk(KERN_WARNING "PAX: VMI: ROM size too big: %x\n", rom->rom_length * 512); + return 0; + } if (rom->api_version_maj != VMI_API_REV_MAJOR || rom->api_version_min+1 < VMI_API_REV_MINOR+1) { printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", @@ -562,7 +584,7 @@ static inline int __init probe_vmi_rom(void) struct vrom_header *romstart; romstart = (struct vrom_header *)isa_bus_to_virt(base); if (check_vmi_rom(romstart)) { - vmi_rom = romstart; + vmi_rom = *romstart; return 1; } } @@ -836,6 +858,11 @@ static inline int __init activate_vmi(void) para_fill(pv_irq_ops.safe_halt, Halt); +#ifdef CONFIG_PAX_KERNEXEC + pv_mmu_ops.pax_open_kernel = vmi_pax_open_kernel; + pv_mmu_ops.pax_close_kernel = vmi_pax_close_kernel; +#endif + /* * Alternative instruction rewriting doesn't happen soon enough * to convert VMI_IRET to a call instead of a jump; so we have @@ -853,16 +880,16 @@ static inline int __init activate_vmi(void) void __init vmi_init(void) { - if (!vmi_rom) + if (!vmi_rom.rom_signature) probe_vmi_rom(); else - check_vmi_rom(vmi_rom); + check_vmi_rom(&vmi_rom); /* In case probing for or validating the ROM failed, basil */ - if (!vmi_rom) + if (!vmi_rom.rom_signature) return; - reserve_top_address(-vmi_rom->virtual_top); + reserve_top_address(-vmi_rom.virtual_top); #ifdef CONFIG_X86_IO_APIC /* This is virtual hardware; timer routing is wired correctly */ @@ -874,7 +901,7 @@ void __init vmi_activate(void) { unsigned long flags; - if (!vmi_rom) + if (!vmi_rom.rom_signature) return; local_irq_save(flags); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3c68fe2..12c8280 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -26,6 +26,13 @@ #include #include #include +#include + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) +#define __KERNEL_TEXT_OFFSET (LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR) +#else +#define __KERNEL_TEXT_OFFSET 0 +#endif #undef i386 /* in case the preprocessor is a 32bit one */ @@ -34,40 +41,53 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_X86_32 + module PT_LOAD FLAGS(5); /* R_E */ +#endif +#ifdef CONFIG_XEN + rodata PT_LOAD FLAGS(5); /* R_E */ +#else + rodata PT_LOAD FLAGS(4); /* R__ */ +#endif + data PT_LOAD FLAGS(6); /* RW_ */ #ifdef CONFIG_X86_64 user PT_LOAD FLAGS(5); /* R_E */ +#endif + init.begin PT_LOAD FLAGS(6); /* RW_ */ #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(6); /* RW_ */ #endif + text.init PT_LOAD FLAGS(5); /* R_E */ + text.exit PT_LOAD FLAGS(5); /* R_E */ init PT_LOAD FLAGS(7); /* RWE */ -#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { #ifdef CONFIG_X86_32 - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; + . = LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR; #else - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; + . = __START_KERNEL; #endif /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; + .text (. - __KERNEL_TEXT_OFFSET): AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { /* bootstrapping code */ +#ifdef CONFIG_X86_32 + phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; +#else + phys_startup_64 = startup_64 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; +#endif + __LOAD_PHYSICAL_ADDR = . - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; + _text = .; HEAD_TEXT #ifdef CONFIG_X86_32 . = ALIGN(PAGE_SIZE); @@ -82,28 +102,71 @@ SECTIONS IRQENTRY_TEXT *(.fixup) *(.gnu.warning) - /* End of text section */ - _etext = .; } :text = 0x9090 - NOTES :text :note + . += __KERNEL_TEXT_OFFSET; - EXCEPTION_TABLE(16) :text = 0x9090 +#ifdef CONFIG_X86_32 + . = ALIGN(PAGE_SIZE); + .vmi.rom : AT(ADDR(.vmi.rom) - LOAD_OFFSET) { + *(.vmi.rom) + } :module + + . = ALIGN(PAGE_SIZE); + .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { + +#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_MODULES) + MODULES_EXEC_VADDR = .; + BYTE(0) + . += (CONFIG_PAX_KERNEXEC_MODULE_TEXT * 1024 * 1024); + . = ALIGN(HPAGE_SIZE); + MODULES_EXEC_END = . - 1; +#endif + + } :module +#endif + + .text.end : AT(ADDR(.text.end) - LOAD_OFFSET) { + /* End of text section */ + _etext = . - __KERNEL_TEXT_OFFSET; + } + +#ifdef CONFIG_X86_32 + . = ALIGN(PAGE_SIZE); + .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { + *(.idt) + . = ALIGN(PAGE_SIZE); + *(.empty_zero_page) + *(.swapper_pg_fixmap) + *(.swapper_pg_pmd) + *(.swapper_pg_dir) + *(.trampoline_pg_dir) + } :rodata +#endif + + . = ALIGN(PAGE_SIZE); + NOTES :rodata :note + + EXCEPTION_TABLE(16) :rodata RO_DATA(PAGE_SIZE) /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(HPAGE_SIZE); +#else + . = ALIGN(PAGE_SIZE); +#endif + /* Start of data section */ _sdata = .; /* init_task */ INIT_TASK_DATA(THREAD_SIZE) -#ifdef CONFIG_X86_32 - /* 32 bit has nosave before _edata */ NOSAVE_DATA -#endif PAGE_ALIGNED_DATA(PAGE_SIZE) @@ -112,6 +175,8 @@ SECTIONS DATA_DATA CONSTRUCTORS + jiffies = jiffies_64; + /* rarely changed data like cpu maps */ READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) @@ -166,12 +231,6 @@ SECTIONS } vgetcpu_mode = VVIRT(.vgetcpu_mode); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { - *(.jiffies) - } - jiffies = VVIRT(.jiffies); - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } @@ -187,12 +246,19 @@ SECTIONS #endif /* CONFIG_X86_64 */ /* Init code and data - will be freed after init */ - . = ALIGN(PAGE_SIZE); .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { + BYTE(0) + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(HPAGE_SIZE); +#else + . = ALIGN(PAGE_SIZE); +#endif + __init_begin = .; /* paired with __init_end */ - } + } :init.begin -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - .init.text - should @@ -201,12 +267,27 @@ SECTIONS PERCPU_VADDR(0, :percpu) #endif - INIT_TEXT_SECTION(PAGE_SIZE) -#ifdef CONFIG_X86_64 - :init -#endif + . = ALIGN(PAGE_SIZE); + init_begin = .; + .init.text (. - __KERNEL_TEXT_OFFSET): AT(init_begin - LOAD_OFFSET) { + VMLINUX_SYMBOL(_sinittext) = .; + INIT_TEXT + VMLINUX_SYMBOL(_einittext) = .; + . = ALIGN(PAGE_SIZE); + } :text.init - INIT_DATA_SECTION(16) + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + EXIT_TEXT + . = ALIGN(16); + } :text.exit + . = init_begin + SIZEOF(.init.text) + SIZEOF(.exit.text); + + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(16) :init .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { __x86_cpu_dev_start = .; @@ -232,19 +313,11 @@ SECTIONS *(.altinstr_replacement) } - /* - * .exit.text is discard at runtime, not link time, to deal with - * references from .altinstructions and .eh_frame - */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA } -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) +#ifndef CONFIG_SMP PERCPU(PAGE_SIZE) #endif @@ -267,12 +340,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); } -#ifdef CONFIG_X86_64 - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - NOSAVE_DATA - } -#endif - /* BSS */ . = ALIGN(PAGE_SIZE); .bss : AT(ADDR(.bss) - LOAD_OFFSET) { @@ -288,6 +355,7 @@ SECTIONS __brk_base = .; . += 64 * 1024; /* 64k alignment slop space */ *(.brk_reservation) /* areas brk users have reserved */ + . = ALIGN(HPAGE_SIZE); __brk_limit = .; } @@ -316,13 +384,12 @@ SECTIONS * for the boot processor. */ #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load -INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), +. = ASSERT((_end - _text - __KERNEL_TEXT_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 62f39d7..3bc46a1 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -80,6 +80,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); /* copy vsyscall data */ + strlcpy(vsyscall_gtod_data.clock.name, clock->name, sizeof vsyscall_gtod_data.clock.name); vsyscall_gtod_data.clock.vread = clock->vread; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; vsyscall_gtod_data.clock.mask = clock->mask; @@ -203,7 +204,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) We do this here because otherwise user space would do it on its own in a likely inferior way (no access to jiffies). If you don't like it pass NULL. */ - if (tcache && tcache->blob[0] == (j = __jiffies)) { + if (tcache && tcache->blob[0] == (j = jiffies)) { p = tcache->blob[1]; } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3b..5433a97 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,8 +30,6 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(copy_page); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c5ee17e..d63218f 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -54,7 +54,7 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, fx_sw_user->xstate_size > fx_sw_user->extended_size) return -1; - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + err = __get_user(magic2, (__u32 __user *) (((void __user *)fpstate) + fx_sw_user->extended_size - FP_XSTATE_MAGIC2_SIZE)); /* @@ -196,7 +196,7 @@ fx_only: * the other extended state. */ xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); + return fxrstor_checking((struct i387_fxsave_struct __force_kernel *)buf); } /* @@ -228,7 +228,7 @@ int restore_i387_xstate(void __user *buf) if (task_thread_info(tsk)->status & TS_XSAVE) err = restore_user_xstate(buf); else - err = fxrstor_checking((__force struct i387_fxsave_struct *) + err = fxrstor_checking((struct i387_fxsave_struct __user *) buf); if (unlikely(err)) { /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1350e43..a94b011 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -81,8 +81,8 @@ #define Src2CL (1<<29) #define Src2ImmByte (2<<29) #define Src2One (3<<29) -#define Src2Imm16 (4<<29) -#define Src2Mask (7<<29) +#define Src2Imm16 (4U<<29) +#define Src2Mask (7U<<29) enum { Group1_80, Group1_81, Group1_82, Group1_83, @@ -411,6 +411,7 @@ static u32 group2_table[] = { #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ do { \ + unsigned long _tmp; \ __asm__ __volatile__ ( \ _PRE_EFLAGS("0", "4", "2") \ _op _suffix " %"_x"3,%1; " \ @@ -424,8 +425,6 @@ static u32 group2_table[] = { /* Raw emulation: instruction has two explicit operands. */ #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long _tmp; \ - \ switch ((_dst).bytes) { \ case 2: \ ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ @@ -441,7 +440,6 @@ static u32 group2_table[] = { #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long _tmp; \ switch ((_dst).bytes) { \ case 1: \ ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8dfeaaa..4daa395 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -52,7 +52,7 @@ #define APIC_BUS_CYCLE_NS 1 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) #define APIC_LVT_NUM 6 /* 14 is the version for Xeon and Pentium 8.4.8*/ diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3bc2707..dd157e2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -416,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int level = PT_PAGE_TABLE_LEVEL; unsigned long mmu_seq; + pax_track_stack(); + pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -461,6 +463,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, kvm_mmu_free_some_pages(vcpu); sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, level, &write_pt, pfn); + (void)sptep; pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, sptep, *sptep, write_pt); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7c6e63e..c5d92c1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2486,7 +2486,11 @@ static void reload_tss(struct kvm_vcpu *vcpu) int cpu = raw_smp_processor_id(); struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); + + pax_open_kernel(); svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ + pax_close_kernel(); + load_TR_desc(); } @@ -2947,7 +2951,7 @@ static bool svm_gb_page_enable(void) return true; } -static struct kvm_x86_ops svm_x86_ops = { +static const struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, .hardware_setup = svm_hardware_setup, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e6d925f..e7a4af8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -570,7 +570,11 @@ static void reload_tss(void) kvm_get_gdt(&gdt); descs = (void *)gdt.base; + + pax_open_kernel(); descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ + pax_close_kernel(); + load_TR_desc(); } @@ -1410,8 +1414,11 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_flexpriority()) flexpriority_enabled = 0; - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; + if (!cpu_has_vmx_tpr_shadow()) { + pax_open_kernel(); + *(void **)&kvm_x86_ops->update_cr8_intercept = NULL; + pax_close_kernel(); + } if (enable_ept && !cpu_has_vmx_ept_2m_page()) kvm_disable_largepages(); @@ -2362,7 +2369,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); - vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ + vmcs_writel(HOST_RIP, ktla_ktva(kvm_vmx_return)); /* 22.2.5 */ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); @@ -3718,6 +3725,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) "jmp .Lkvm_vmx_return \n\t" ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" ".Lkvm_vmx_return: " + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + "ljmp %[cs],$.Lkvm_vmx_return2\n\t" + ".Lkvm_vmx_return2: " +#endif + /* Save guest registers, load host registers, keep flags */ "xchg %0, (%%"R"sp) \n\t" "mov %%"R"ax, %c[rax](%0) \n\t" @@ -3764,8 +3777,13 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), #endif [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + ,[cs]"i"(__KERNEL_CS) +#endif + : "cc", "memory" - , R"bx", R"di", R"si" + , R"ax", R"bx", R"di", R"si" #ifdef CONFIG_X86_64 , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #endif @@ -3782,7 +3800,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); + asm("mov %0, %%ds; mov %0, %%es; mov %0, %%ss" : : "r"(__KERNEL_DS)); + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + loadsegment(fs, __KERNEL_PERCPU); +#endif + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) + __set_fs(current_thread_info()->addr_limit); +#endif + vmx->launched = 1; vmx_complete_interrupts(vmx); @@ -3957,7 +3984,7 @@ static bool vmx_gb_page_enable(void) return false; } -static struct kvm_x86_ops vmx_x86_ops = { +static const struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, .hardware_setup = hardware_setup, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index df1cefb..5e882ad 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -82,7 +82,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); -struct kvm_x86_ops *kvm_x86_ops; +const struct kvm_x86_ops *kvm_x86_ops; EXPORT_SYMBOL_GPL(kvm_x86_ops); int ignore_msrs = 0; @@ -1430,15 +1430,20 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - int r; + int r, i; r = -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) goto out; r = -EFAULT; - if (copy_from_user(&vcpu->arch.cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry2))) + if (!access_ok(VERIFY_READ, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry2))) goto out; + for (i = 0; i < cpuid->nent; ++i) { + struct kvm_cpuid_entry2 cpuid_entry; + if (__copy_from_user(&cpuid_entry, entries + i, sizeof(cpuid_entry))) + goto out; + vcpu->arch.cpuid_entries[i] = cpuid_entry; + } vcpu->arch.cpuid_nent = cpuid->nent; kvm_apic_set_version(vcpu); return 0; @@ -1451,16 +1456,20 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - int r; + int r, i; vcpu_load(vcpu); r = -E2BIG; if (cpuid->nent < vcpu->arch.cpuid_nent) goto out; r = -EFAULT; - if (copy_to_user(entries, &vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + if (!access_ok(VERIFY_WRITE, entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) goto out; + for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { + struct kvm_cpuid_entry2 cpuid_entry = vcpu->arch.cpuid_entries[i]; + if (__copy_to_user(entries + i, &cpuid_entry, sizeof(cpuid_entry))) + goto out; + } return 0; out: @@ -1678,7 +1687,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= 256) + if (irq->irq >= 256) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; @@ -3260,10 +3269,10 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { .notifier_call = kvmclock_cpufreq_notifier }; -int kvm_arch_init(void *opaque) +int kvm_arch_init(const void *opaque) { int r, cpu; - struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; + const struct kvm_x86_ops *ops = (const struct kvm_x86_ops *)opaque; if (kvm_x86_ops) { printk(KERN_ERR "kvm: already loaded the other module\n"); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7e59dc1..b88c98f 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1172,9 +1172,10 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) * Rebooting also tells the Host we're finished, but the RESTART flag tells the * Launcher to reboot us. */ -static void lguest_restart(char *reason) +static __noreturn void lguest_restart(char *reason) { kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); + BUG(); } /*G:050 diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0b..c619e96 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c @@ -25,6 +25,12 @@ u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) } EXPORT_SYMBOL(atomic64_cmpxchg); +u64 atomic64_cmpxchg_unchecked(atomic64_unchecked_t *ptr, u64 old_val, u64 new_val) +{ + return cmpxchg8b(&ptr->counter, old_val, new_val); +} +EXPORT_SYMBOL(atomic64_cmpxchg_unchecked); + /** * atomic64_xchg - xchg atomic64 variable * @ptr: pointer to type atomic64_t @@ -56,6 +62,36 @@ u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) EXPORT_SYMBOL(atomic64_xchg); /** + * atomic64_xchg_unchecked - xchg atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * @new_val: value to assign + * + * Atomically xchgs the value of @ptr to @new_val and returns + * the old value. + */ +u64 atomic64_xchg_unchecked(atomic64_unchecked_t *ptr, u64 new_val) +{ + /* + * Try first with a (possibly incorrect) assumption about + * what we have there. We'll do two loops most likely, + * but we'll get an ownership MESI transaction straight away + * instead of a read transaction followed by a + * flush-for-ownership transaction: + */ + u64 old_val, real_val = 0; + + do { + old_val = real_val; + + real_val = atomic64_cmpxchg_unchecked(ptr, old_val, new_val); + + } while (real_val != old_val); + + return old_val; +} +EXPORT_SYMBOL(atomic64_xchg_unchecked); + +/** * atomic64_set - set atomic64 variable * @ptr: pointer to type atomic64_t * @new_val: value to assign @@ -69,7 +105,19 @@ void atomic64_set(atomic64_t *ptr, u64 new_val) EXPORT_SYMBOL(atomic64_set); /** -EXPORT_SYMBOL(atomic64_read); + * atomic64_unchecked_set - set atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +void atomic64_set_unchecked(atomic64_unchecked_t *ptr, u64 new_val) +{ + atomic64_xchg_unchecked(ptr, new_val); +} +EXPORT_SYMBOL(atomic64_set_unchecked); + +/** * atomic64_add_return - add and return * @delta: integer value to add * @ptr: pointer to type atomic64_t @@ -99,24 +147,72 @@ noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) } EXPORT_SYMBOL(atomic64_add_return); +/** + * atomic64_add_return_unchecked - add and return + * @delta: integer value to add + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically adds @delta to @ptr and returns @delta + *@ptr + */ +noinline u64 atomic64_add_return_unchecked(u64 delta, atomic64_unchecked_t *ptr) +{ + /* + * Try first with a (possibly incorrect) assumption about + * what we have there. We'll do two loops most likely, + * but we'll get an ownership MESI transaction straight away + * instead of a read transaction followed by a + * flush-for-ownership transaction: + */ + u64 old_val, new_val, real_val = 0; + + do { + old_val = real_val; + new_val = old_val + delta; + + real_val = atomic64_cmpxchg_unchecked(ptr, old_val, new_val); + + } while (real_val != old_val); + + return new_val; +} +EXPORT_SYMBOL(atomic64_add_return_unchecked); + u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) { return atomic64_add_return(-delta, ptr); } EXPORT_SYMBOL(atomic64_sub_return); +u64 atomic64_sub_return_unchecked(u64 delta, atomic64_unchecked_t *ptr) +{ + return atomic64_add_return_unchecked(-delta, ptr); +} +EXPORT_SYMBOL(atomic64_sub_return_unchecked); + u64 atomic64_inc_return(atomic64_t *ptr) { return atomic64_add_return(1, ptr); } EXPORT_SYMBOL(atomic64_inc_return); +u64 atomic64_inc_return_unchecked(atomic64_unchecked_t *ptr) +{ + return atomic64_add_return_unchecked(1, ptr); +} +EXPORT_SYMBOL(atomic64_inc_return_unchecked); + u64 atomic64_dec_return(atomic64_t *ptr) { return atomic64_sub_return(1, ptr); } EXPORT_SYMBOL(atomic64_dec_return); +u64 atomic64_dec_return_unchecked(atomic64_unchecked_t *ptr) +{ + return atomic64_sub_return_unchecked(1, ptr); +} +EXPORT_SYMBOL(atomic64_dec_return_unchecked); + /** * atomic64_add - add integer to atomic64 variable * @delta: integer value to add @@ -131,6 +227,19 @@ void atomic64_add(u64 delta, atomic64_t *ptr) EXPORT_SYMBOL(atomic64_add); /** + * atomic64_add_unchecked - add integer to atomic64 variable + * @delta: integer value to add + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically adds @delta to @ptr. + */ +void atomic64_add_unchecked(u64 delta, atomic64_unchecked_t *ptr) +{ + atomic64_add_return_unchecked(delta, ptr); +} +EXPORT_SYMBOL(atomic64_add_unchecked); + +/** * atomic64_sub - subtract the atomic64 variable * @delta: integer value to subtract * @ptr: pointer to type atomic64_t @@ -144,6 +253,19 @@ void atomic64_sub(u64 delta, atomic64_t *ptr) EXPORT_SYMBOL(atomic64_sub); /** + * atomic64_sub_unchecked - subtract the atomic64 variable + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically subtracts @delta from @ptr. + */ +void atomic64_sub_unchecked(u64 delta, atomic64_unchecked_t *ptr) +{ + atomic64_add_unchecked(-delta, ptr); +} +EXPORT_SYMBOL(atomic64_sub_unchecked); + +/** * atomic64_sub_and_test - subtract value from variable and test result * @delta: integer value to subtract * @ptr: pointer to type atomic64_t @@ -173,6 +295,18 @@ void atomic64_inc(atomic64_t *ptr) EXPORT_SYMBOL(atomic64_inc); /** + * atomic64_inc_unchecked - increment atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically increments @ptr by 1. + */ +void atomic64_inc_unchecked(atomic64_unchecked_t *ptr) +{ + atomic64_add_unchecked(1, ptr); +} +EXPORT_SYMBOL(atomic64_inc_unchecked); + +/** * atomic64_dec - decrement atomic64 variable * @ptr: pointer to type atomic64_t * @@ -185,6 +319,18 @@ void atomic64_dec(atomic64_t *ptr) EXPORT_SYMBOL(atomic64_dec); /** + * atomic64_dec_unchecked - decrement atomic64 variable + * @ptr: pointer to type atomic64_unchecked_t + * + * Atomically decrements @ptr by 1. + */ +void atomic64_dec_unchecked(atomic64_unchecked_t *ptr) +{ + atomic64_sub_unchecked(1, ptr); +} +EXPORT_SYMBOL(atomic64_dec_unchecked); + +/** * atomic64_dec_and_test - decrement and test * @ptr: pointer to type atomic64_t * diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index adbccd0..98f96c8 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -28,7 +28,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -304,9 +305,28 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, #define ARGBASE 16 #define FP 12 - -ENTRY(csum_partial_copy_generic) + +ENTRY(csum_partial_copy_generic_to_user) CFI_STARTPROC + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + popl %es + CFI_ADJUST_CFA_OFFSET -4 + jmp csum_partial_copy_generic +#endif + +ENTRY(csum_partial_copy_generic_from_user) + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + popl %ds + CFI_ADJUST_CFA_OFFSET -4 +#endif + +ENTRY(csum_partial_copy_generic) subl $4,%esp CFI_ADJUST_CFA_OFFSET 4 pushl %edi @@ -331,7 +351,7 @@ ENTRY(csum_partial_copy_generic) jmp 4f SRC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +DST( movw %bx, %es:(%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -343,30 +363,30 @@ DST( movw %bx, (%edi) ) SRC(1: movl (%esi), %ebx ) SRC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +DST( movl %edx, %es:4(%edi) ) SRC( movl 8(%esi), %ebx ) SRC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +DST( movl %ebx, %es:8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +DST( movl %edx, %es:12(%edi) ) SRC( movl 16(%esi), %ebx ) SRC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +DST( movl %ebx, %es:16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +DST( movl %edx, %es:20(%edi) ) SRC( movl 24(%esi), %ebx ) SRC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +DST( movl %ebx, %es:24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +DST( movl %edx, %es:28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -380,7 +400,7 @@ DST( movl %edx, 28(%edi) ) shrl $2, %edx # This clears CF SRC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -392,12 +412,12 @@ DST( movl %ebx, (%edi) ) jb 5f SRC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +DST( movw %cx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +DST( movb %cl, %es:(%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: @@ -408,7 +428,7 @@ DST( movb %cl, (%edi) ) 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination - computing the rest # is too much work @@ -421,11 +441,19 @@ DST( movb %cl, (%edi) ) 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) + movl $-EFAULT,%ss:(%ebx) jmp 5000b .previous + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + popl %ds + CFI_ADJUST_CFA_OFFSET -4 + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + popl %es + CFI_ADJUST_CFA_OFFSET -4 popl %ebx CFI_ADJUST_CFA_OFFSET -4 CFI_RESTORE ebx @@ -439,26 +467,47 @@ DST( movb %cl, (%edi) ) CFI_ADJUST_CFA_OFFSET -4 ret CFI_ENDPROC -ENDPROC(csum_partial_copy_generic) +ENDPROC(csum_partial_copy_generic_to_user) #else /* Version for PentiumII/PPro */ #define ROUND1(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)) ; #define ROUND(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)) ; #define ARGBASE 12 - -ENTRY(csum_partial_copy_generic) + +ENTRY(csum_partial_copy_generic_to_user) CFI_STARTPROC + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + popl %es + CFI_ADJUST_CFA_OFFSET -4 + jmp csum_partial_copy_generic +#endif + +ENTRY(csum_partial_copy_generic_from_user) + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + popl %ds + CFI_ADJUST_CFA_OFFSET -4 +#endif + +ENTRY(csum_partial_copy_generic) pushl %ebx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 @@ -482,7 +531,7 @@ ENTRY(csum_partial_copy_generic) subl %ebx, %edi lea -1(%esi),%edx andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx + lea 3f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx 1: addl $64,%esi @@ -503,19 +552,19 @@ ENTRY(csum_partial_copy_generic) jb 5f SRC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +DST( movw %dx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +DST( movb %dl, %es:(%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination (computing the rest is too much work) movl ARGBASE+8(%esp),%edi # dst movl ARGBASE+12(%esp),%ecx # len @@ -523,10 +572,21 @@ DST( movb %dl, (%edi) ) rep; stosb jmp 7b 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) jmp 7b .previous +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + popl %ds + CFI_ADJUST_CFA_OFFSET -4 + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + popl %es + CFI_ADJUST_CFA_OFFSET -4 +#endif + popl %esi CFI_ADJUST_CFA_OFFSET -4 CFI_RESTORE esi @@ -538,7 +598,7 @@ DST( movb %dl, (%edi) ) CFI_RESTORE ebx ret CFI_ENDPROC -ENDPROC(csum_partial_copy_generic) +ENDPROC(csum_partial_copy_generic_to_user) #undef ROUND #undef ROUND1 diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index ebeafcc..1e3a402 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,5 +1,6 @@ #include #include +#include /* * Zero a page. @@ -10,6 +11,7 @@ ENTRY(clear_page_c) movl $4096/8,%ecx xorl %eax,%eax rep stosq + pax_force_retaddr ret CFI_ENDPROC ENDPROC(clear_page_c) @@ -33,6 +35,7 @@ ENTRY(clear_page) leaq 64(%rdi),%rdi jnz .Lloop nop + pax_force_retaddr ret CFI_ENDPROC .Lclear_page_end: @@ -43,7 +46,7 @@ ENDPROC(clear_page) #include - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 1: .byte 0xeb /* jmp */ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 2: diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 727a5d4..333818a 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -2,12 +2,14 @@ #include #include +#include ALIGN copy_page_c: CFI_STARTPROC movl $4096/8,%ecx rep movsq + pax_force_retaddr ret CFI_ENDPROC ENDPROC(copy_page_c) @@ -38,7 +40,7 @@ ENTRY(copy_page) movq 16 (%rsi), %rdx movq 24 (%rsi), %r8 movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 + movq 40 (%rsi), %r13 movq 48 (%rsi), %r11 movq 56 (%rsi), %r12 @@ -49,7 +51,7 @@ ENTRY(copy_page) movq %rdx, 16 (%rdi) movq %r8, 24 (%rdi) movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) + movq %r13, 40 (%rdi) movq %r11, 48 (%rdi) movq %r12, 56 (%rdi) @@ -68,7 +70,7 @@ ENTRY(copy_page) movq 16 (%rsi), %rdx movq 24 (%rsi), %r8 movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 + movq 40 (%rsi), %r13 movq 48 (%rsi), %r11 movq 56 (%rsi), %r12 @@ -77,7 +79,7 @@ ENTRY(copy_page) movq %rdx, 16 (%rdi) movq %r8, 24 (%rdi) movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) + movq %r13, 40 (%rdi) movq %r11, 48 (%rdi) movq %r12, 56 (%rdi) @@ -94,6 +96,7 @@ ENTRY(copy_page) CFI_RESTORE r13 addq $3*8,%rsp CFI_ADJUST_CFA_OFFSET -3*8 + pax_force_retaddr ret .Lcopy_page_end: CFI_ENDPROC @@ -104,7 +107,7 @@ ENDPROC(copy_page) #include - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 1: .byte 0xeb /* jmp */ .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ 2: diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index af8debd..40c75f3 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -15,13 +15,15 @@ #include #include #include +#include +#include .macro ALTERNATIVE_JUMP feature,orig,alt 0: .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 2: .byte 0xe9 /* near jump with 32bit immediate */ .long \alt-1b /* offset */ /* or alternatively to alt */ .previous @@ -64,55 +66,26 @@ #endif .endm -/* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) - CFI_STARTPROC - GET_THREAD_INFO(%rax) - movq %rdi,%rcx - addq %rdx,%rcx - jc bad_to_user - cmpq TI_addr_limit(%rax),%rcx - ja bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(copy_to_user) - -/* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) - CFI_STARTPROC - GET_THREAD_INFO(%rax) - movq %rsi,%rcx - addq %rdx,%rcx - jc bad_from_user - cmpq TI_addr_limit(%rax),%rcx - ja bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(copy_from_user) - ENTRY(copy_user_generic) CFI_STARTPROC ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) bad_from_user: CFI_STARTPROC + testl %edx,%edx + js bad_to_user movl %edx,%ecx xorl %eax,%eax rep stosb bad_to_user: movl %edx,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(bad_from_user) @@ -142,19 +115,19 @@ ENTRY(copy_user_generic_unrolled) jz 17f 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 +3: movq 2*8(%rsi),%rax 4: movq 3*8(%rsi),%r11 5: movq %r8,(%rdi) 6: movq %r9,1*8(%rdi) -7: movq %r10,2*8(%rdi) +7: movq %rax,2*8(%rdi) 8: movq %r11,3*8(%rdi) 9: movq 4*8(%rsi),%r8 10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 +11: movq 6*8(%rsi),%rax 12: movq 7*8(%rsi),%r11 13: movq %r8,4*8(%rdi) 14: movq %r9,5*8(%rdi) -15: movq %r10,6*8(%rdi) +15: movq %rax,6*8(%rdi) 16: movq %r11,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi @@ -180,6 +153,7 @@ ENTRY(copy_user_generic_unrolled) decl %ecx jnz 21b 23: xor %eax,%eax + pax_force_retaddr ret .section .fixup,"ax" @@ -252,6 +226,7 @@ ENTRY(copy_user_generic_string) 3: rep movsb 4: xorl %eax,%eax + pax_force_retaddr ret .section .fixup,"ax" diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index cb0c112..e3a6895 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -8,12 +8,14 @@ #include #include +#include #define FIX_ALIGNMENT 1 #include #include #include +#include .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT @@ -50,6 +52,15 @@ */ ENTRY(__copy_user_nocache) CFI_STARTPROC + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov $PAX_USER_SHADOW_BASE,%rcx + cmp %rcx,%rsi + jae 1f + add %rcx,%rsi +1: +#endif + cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -59,19 +70,19 @@ ENTRY(__copy_user_nocache) jz 17f 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 -3: movq 2*8(%rsi),%r10 +3: movq 2*8(%rsi),%rax 4: movq 3*8(%rsi),%r11 5: movnti %r8,(%rdi) 6: movnti %r9,1*8(%rdi) -7: movnti %r10,2*8(%rdi) +7: movnti %rax,2*8(%rdi) 8: movnti %r11,3*8(%rdi) 9: movq 4*8(%rsi),%r8 10: movq 5*8(%rsi),%r9 -11: movq 6*8(%rsi),%r10 +11: movq 6*8(%rsi),%rax 12: movq 7*8(%rsi),%r11 13: movnti %r8,4*8(%rdi) 14: movnti %r9,5*8(%rdi) -15: movnti %r10,6*8(%rdi) +15: movnti %rax,6*8(%rdi) 16: movnti %r11,7*8(%rdi) leaq 64(%rsi),%rsi leaq 64(%rdi),%rdi @@ -98,6 +109,7 @@ ENTRY(__copy_user_nocache) jnz 21b 23: xorl %eax,%eax sfence + pax_force_retaddr ret .section .fixup,"ax" diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index f0dba36..48cb4d6 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -8,6 +8,7 @@ #include #include #include +#include /* * Checksum copy with exception handling. @@ -228,6 +229,7 @@ ENTRY(csum_partial_copy_generic) CFI_RESTORE rbp addq $7*8,%rsp CFI_ADJUST_CFA_OFFSET -7*8 + pax_force_retaddr 0, 1 ret CFI_RESTORE_STATE diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 459b58a..9570bc7 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -52,7 +52,13 @@ csum_partial_copy_from_user(const void __user *src, void *dst, len -= 2; } } - isum = csum_partial_copy_generic((__force const void *)src, + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; +#endif + + isum = csum_partial_copy_generic((const void __force_kernel *)src, dst, len, isum, errp, NULL); if (unlikely(*errp)) goto out_err; @@ -105,7 +111,13 @@ csum_partial_copy_to_user(const void *src, void __user *dst, } *errp = 0; - return csum_partial_copy_generic(src, (void __force *)dst, + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)dst < PAX_USER_SHADOW_BASE) + dst += PAX_USER_SHADOW_BASE; +#endif + + return csum_partial_copy_generic(src, (void __force_kernel *)dst, len, isum, NULL, errp); } EXPORT_SYMBOL(csum_partial_copy_to_user); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 51f1504..ddac4c1 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -33,15 +33,38 @@ #include #include #include +#include +#include +#include + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __copyuser_seg gs; +#else +#define __copyuser_seg +#endif .text ENTRY(__get_user_1) CFI_STARTPROC + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -1: movzb (%_ASM_AX),%edx + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + +#endif + +1: __copyuser_seg movzb (%_ASM_AX),%edx xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_1) @@ -49,12 +72,26 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC add $1,%_ASM_AX + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) jc bad_get_user GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -2: movzwl -1(%_ASM_AX),%edx + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + +#endif + +2: __copyuser_seg movzwl -1(%_ASM_AX),%edx xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_2) @@ -62,12 +99,26 @@ ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC add $3,%_ASM_AX + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) jc bad_get_user GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -3: mov -3(%_ASM_AX),%edx + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + +#endif + +3: __copyuser_seg mov -3(%_ASM_AX),%edx xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_4) @@ -80,8 +131,18 @@ ENTRY(__get_user_8) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov $PAX_USER_SHADOW_BASE,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + 4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_8) @@ -91,6 +152,7 @@ bad_get_user: CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX + pax_force_retaddr ret CFI_ENDPROC END(bad_get_user) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index 05a95e7..326f2fa 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -17,6 +17,7 @@ #include #include +#include /* * override generic version in lib/iomap_copy.c @@ -25,6 +26,7 @@ ENTRY(__iowrite32_copy) CFI_STARTPROC movl %edx,%ecx rep movsd + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441e..610e351 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -4,6 +4,7 @@ #include #include +#include /* * memcpy - Copy a memory block. @@ -34,6 +35,7 @@ memcpy_c: rep movsq movl %edx, %ecx rep movsb + pax_force_retaddr ret CFI_ENDPROC ENDPROC(memcpy_c) @@ -118,6 +120,7 @@ ENTRY(memcpy) jnz .Lloop_1 .Lend: + pax_force_retaddr 0, 1 ret CFI_ENDPROC ENDPROC(memcpy) @@ -128,7 +131,7 @@ ENDPROC(__memcpy) * It is also a lot simpler. Use this when possible: */ - .section .altinstr_replacement, "ax" + .section .altinstr_replacement, "a" 1: .byte 0xeb /* jmp */ .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ 2: diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c59481..7e9ba4e 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -2,6 +2,7 @@ #include #include +#include /* * ISO C memset - set a memory block to a byte value. @@ -28,6 +29,7 @@ memset_c: movl %r8d,%ecx rep stosb movq %r9,%rax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(memset_c) @@ -35,13 +37,13 @@ ENDPROC(memset_c) ENTRY(memset) ENTRY(__memset) CFI_STARTPROC - movq %rdi,%r10 movq %rdx,%r11 /* expand byte value */ movzbl %sil,%ecx movabs $0x0101010101010101,%rax mul %rcx /* with rax, clobbers rdx */ + movq %rdi,%rdx /* align dst */ movl %edi,%r9d @@ -95,7 +97,8 @@ ENTRY(__memset) jnz .Lloop_1 .Lende: - movq %r10,%rax + movq %rdx,%rax + pax_force_retaddr ret CFI_RESTORE_STATE @@ -118,7 +121,7 @@ ENDPROC(__memset) #include - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 1: .byte 0xeb /* jmp */ .byte (memset_c - memset) - (2f - 1b) /* offset */ 2: diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index c9f2d9b..e7fd2c0 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -29,6 +29,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; int i; + unsigned long cr0; if (unlikely(in_interrupt())) return __memcpy(to, from, len); @@ -39,44 +40,72 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) kernel_fpu_begin(); __asm__ __volatile__ ( - "1: prefetch (%0)\n" /* This set is 28 bytes */ - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" /* This set is 28 bytes */ + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) - : : "r" (from)); + : "=&r" (cr0) : "r" (from) : "ax"); for ( ; i > 5; i--) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movq 8(%1), %%mm1\n" + " movq 16(%1), %%mm2\n" + " movq 24(%1), %%mm3\n" + " movq %%mm0, (%2)\n" + " movq %%mm1, 8(%2)\n" + " movq %%mm2, 16(%2)\n" + " movq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm0\n" + " movq 40(%1), %%mm1\n" + " movq 48(%1), %%mm2\n" + " movq 56(%1), %%mm3\n" + " movq %%mm0, 32(%2)\n" + " movq %%mm1, 40(%2)\n" + " movq %%mm2, 48(%2)\n" + " movq %%mm3, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) - : : "r" (from), "r" (to) : "memory"); + : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from += 64; to += 64; @@ -158,6 +187,7 @@ static void fast_clear_page(void *page) static void fast_copy_page(void *to, void *from) { int i; + unsigned long cr0; kernel_fpu_begin(); @@ -166,42 +196,70 @@ static void fast_copy_page(void *to, void *from) * but that is for later. -AV */ __asm__ __volatile__( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from)); + _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); for (i = 0; i < (4096-320)/64; i++) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movntq %%mm0, (%2)\n" + " movq 8(%1), %%mm1\n" + " movntq %%mm1, 8(%2)\n" + " movq 16(%1), %%mm2\n" + " movntq %%mm2, 16(%2)\n" + " movq 24(%1), %%mm3\n" + " movntq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm4\n" + " movntq %%mm4, 32(%2)\n" + " movq 40(%1), %%mm5\n" + " movntq %%mm5, 40(%2)\n" + " movq 48(%1), %%mm6\n" + " movntq %%mm6, 48(%2)\n" + " movq 56(%1), %%mm7\n" + " movntq %%mm7, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); + _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from += 64; to += 64; @@ -280,47 +338,76 @@ static void fast_clear_page(void *page) static void fast_copy_page(void *to, void *from) { int i; + unsigned long cr0; kernel_fpu_begin(); __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from)); + _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); for (i = 0; i < 4096/64; i++) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movq 8(%1), %%mm1\n" + " movq 16(%1), %%mm2\n" + " movq 24(%1), %%mm3\n" + " movq %%mm0, (%2)\n" + " movq %%mm1, 8(%2)\n" + " movq %%mm2, 16(%2)\n" + " movq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm0\n" + " movq 40(%1), %%mm1\n" + " movq 48(%1), %%mm2\n" + " movq 56(%1), %%mm3\n" + " movq %%mm0, 32(%2)\n" + " movq %%mm1, 40(%2)\n" + " movq %%mm2, 48(%2)\n" + " movq %%mm3, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) - : : "r" (from), "r" (to) : "memory"); + : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from += 64; to += 64; diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa106..adda88b 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -3,6 +3,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 /* @@ -16,7 +17,7 @@ ENTRY(native_\op\()_safe_regs) CFI_STARTPROC pushq_cfi %rbx pushq_cfi %rbp - movq %rdi, %r10 /* Save pointer */ + movq %rdi, %r9 /* Save pointer */ xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax movl 4(%rdi), %ecx @@ -27,16 +28,17 @@ ENTRY(native_\op\()_safe_regs) movl 28(%rdi), %edi CFI_REMEMBER_STATE 1: \op -2: movl %eax, (%r10) +2: movl %eax, (%r9) movl %r11d, %eax /* Return value */ - movl %ecx, 4(%r10) - movl %edx, 8(%r10) - movl %ebx, 12(%r10) - movl %ebp, 20(%r10) - movl %esi, 24(%r10) - movl %edi, 28(%r10) + movl %ecx, 4(%r9) + movl %edx, 8(%r9) + movl %ebx, 12(%r9) + movl %ebp, 20(%r9) + movl %esi, 24(%r9) + movl %edi, 28(%r9) popq_cfi %rbp popq_cfi %rbx + pax_force_retaddr ret 3: CFI_RESTORE_STATE diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 36b0d15..d381858 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -15,7 +15,9 @@ #include #include #include - +#include +#include +#include /* * __put_user_X @@ -29,52 +31,119 @@ * as they get called from within inline assembly. */ -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ +#define ENTER CFI_STARTPROC +#define EXIT pax_force_retaddr; ret ; \ CFI_ENDPROC +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define _DEST %_ASM_CX,%_ASM_BX +#else +#define _DEST %_ASM_CX +#endif + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __copyuser_seg gs; +#else +#define __copyuser_seg +#endif + .text ENTRY(__put_user_1) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user -1: movb %al,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +1: __copyuser_seg movb %al,(_DEST) xor %eax,%eax EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) mov TI_addr_limit(%_ASM_BX),%_ASM_BX sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user -2: movw %ax,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +2: __copyuser_seg movw %ax,(_DEST) xor %eax,%eax EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) mov TI_addr_limit(%_ASM_BX),%_ASM_BX sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user -3: movl %eax,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +3: __copyuser_seg movl %eax,(_DEST) xor %eax,%eax EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) mov TI_addr_limit(%_ASM_BX),%_ASM_BX sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user -4: mov %_ASM_AX,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov $PAX_USER_SHADOW_BASE,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +4: __copyuser_seg mov %_ASM_AX,(_DEST) #ifdef CONFIG_X86_32 -5: movl %edx,4(%_ASM_CX) +5: __copyuser_seg movl %edx,4(_DEST) #endif xor %eax,%eax EXIT diff --git a/arch/x86/lib/rwlock_64.S b/arch/x86/lib/rwlock_64.S index 05ea55f..6345b9a 100644 --- a/arch/x86/lib/rwlock_64.S +++ b/arch/x86/lib/rwlock_64.S @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -10,13 +11,34 @@ ENTRY(__write_lock_failed) CFI_STARTPROC LOCK_PREFIX addl $RW_LOCK_BIAS,(%rdi) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + subl $RW_LOCK_BIAS,(%rdi) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + 1: rep nop cmpl $RW_LOCK_BIAS,(%rdi) jne 1b LOCK_PREFIX subl $RW_LOCK_BIAS,(%rdi) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + addl $RW_LOCK_BIAS,(%rdi) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + jnz __write_lock_failed + pax_force_retaddr ret CFI_ENDPROC END(__write_lock_failed) @@ -26,13 +48,34 @@ ENTRY(__read_lock_failed) CFI_STARTPROC LOCK_PREFIX incl (%rdi) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + decl (%rdi) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + 1: rep nop cmpl $1,(%rdi) js 1b LOCK_PREFIX decl (%rdi) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + incl (%rdi) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + js __read_lock_failed + pax_force_retaddr ret CFI_ENDPROC END(__read_lock_failed) diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 15acecf..f768b10 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S @@ -48,6 +48,7 @@ ENTRY(call_rwsem_down_read_failed) call rwsem_down_read_failed popq %rdx restore_common_regs + pax_force_retaddr ret ENDPROC(call_rwsem_down_read_failed) @@ -56,6 +57,7 @@ ENTRY(call_rwsem_down_write_failed) movq %rax,%rdi call rwsem_down_write_failed restore_common_regs + pax_force_retaddr ret ENDPROC(call_rwsem_down_write_failed) @@ -66,7 +68,8 @@ ENTRY(call_rwsem_wake) movq %rax,%rdi call rwsem_wake restore_common_regs -1: ret +1: pax_force_retaddr + ret ENDPROC(call_rwsem_wake) /* Fix up special calling conventions */ @@ -77,5 +80,6 @@ ENTRY(call_rwsem_downgrade_wake) call rwsem_downgrade_wake popq %rdx restore_common_regs + pax_force_retaddr ret ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index bf9a7d5..fb06ab5 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -10,7 +10,8 @@ #include #include #include - + #include + /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro thunk name,func .globl \name @@ -70,6 +71,7 @@ SAVE_ARGS restore: RESTORE_ARGS + pax_force_retaddr ret CFI_ENDPROC @@ -77,5 +79,6 @@ restore: SAVE_ARGS restore_norax: RESTORE_ARGS 1 + pax_force_retaddr ret CFI_ENDPROC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d4..ec4a953 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -43,7 +43,7 @@ do { \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ - "0: lodsb\n" \ + "0: "__copyuser_seg"lodsb\n" \ " stosb\n" \ " testb %%al,%%al\n" \ " jz 1f\n" \ @@ -128,10 +128,12 @@ do { \ int __d0; \ might_fault(); \ __asm__ __volatile__( \ + __COPYUSER_SET_ES \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ "2:\n" \ + __COPYUSER_RESTORE_ES \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -200,6 +202,7 @@ long strnlen_user(const char __user *s, long n) might_fault(); __asm__ __volatile__( + __COPYUSER_SET_ES " testl %0, %0\n" " jz 3f\n" " andl %0,%%ecx\n" @@ -208,6 +211,7 @@ long strnlen_user(const char __user *s, long n) " subl %%ecx,%0\n" " addl %0,%%eax\n" "1:\n" + __COPYUSER_RESTORE_ES ".section .fixup,\"ax\"\n" "2: xorl %%eax,%%eax\n" " jmp 1b\n" @@ -227,7 +231,7 @@ EXPORT_SYMBOL(strnlen_user); #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size) +__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size) { int d0, d1; __asm__ __volatile__( @@ -239,36 +243,36 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) " .align 2,0x90\n" "3: movl 0(%4), %%eax\n" "4: movl 4(%4), %%edx\n" - "5: movl %%eax, 0(%3)\n" - "6: movl %%edx, 4(%3)\n" + "5: "__copyuser_seg" movl %%eax, 0(%3)\n" + "6: "__copyuser_seg" movl %%edx, 4(%3)\n" "7: movl 8(%4), %%eax\n" "8: movl 12(%4),%%edx\n" - "9: movl %%eax, 8(%3)\n" - "10: movl %%edx, 12(%3)\n" + "9: "__copyuser_seg" movl %%eax, 8(%3)\n" + "10: "__copyuser_seg" movl %%edx, 12(%3)\n" "11: movl 16(%4), %%eax\n" "12: movl 20(%4), %%edx\n" - "13: movl %%eax, 16(%3)\n" - "14: movl %%edx, 20(%3)\n" + "13: "__copyuser_seg" movl %%eax, 16(%3)\n" + "14: "__copyuser_seg" movl %%edx, 20(%3)\n" "15: movl 24(%4), %%eax\n" "16: movl 28(%4), %%edx\n" - "17: movl %%eax, 24(%3)\n" - "18: movl %%edx, 28(%3)\n" + "17: "__copyuser_seg" movl %%eax, 24(%3)\n" + "18: "__copyuser_seg" movl %%edx, 28(%3)\n" "19: movl 32(%4), %%eax\n" "20: movl 36(%4), %%edx\n" - "21: movl %%eax, 32(%3)\n" - "22: movl %%edx, 36(%3)\n" + "21: "__copyuser_seg" movl %%eax, 32(%3)\n" + "22: "__copyuser_seg" movl %%edx, 36(%3)\n" "23: movl 40(%4), %%eax\n" "24: movl 44(%4), %%edx\n" - "25: movl %%eax, 40(%3)\n" - "26: movl %%edx, 44(%3)\n" + "25: "__copyuser_seg" movl %%eax, 40(%3)\n" + "26: "__copyuser_seg" movl %%edx, 44(%3)\n" "27: movl 48(%4), %%eax\n" "28: movl 52(%4), %%edx\n" - "29: movl %%eax, 48(%3)\n" - "30: movl %%edx, 52(%3)\n" + "29: "__copyuser_seg" movl %%eax, 48(%3)\n" + "30: "__copyuser_seg" movl %%edx, 52(%3)\n" "31: movl 56(%4), %%eax\n" "32: movl 60(%4), %%edx\n" - "33: movl %%eax, 56(%3)\n" - "34: movl %%edx, 60(%3)\n" + "33: "__copyuser_seg" movl %%eax, 56(%3)\n" + "34: "__copyuser_seg" movl %%edx, 60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -278,10 +282,119 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" + __COPYUSER_SET_ES "99: rep; movsl\n" "36: movl %%eax, %0\n" "37: rep; movsb\n" "100:\n" + __COPYUSER_RESTORE_ES + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "1: "__copyuser_seg" movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: "__copyuser_seg" movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: "__copyuser_seg" movl 0(%4), %%eax\n" + "4: "__copyuser_seg" movl 4(%4), %%edx\n" + "5: movl %%eax, 0(%3)\n" + "6: movl %%edx, 4(%3)\n" + "7: "__copyuser_seg" movl 8(%4), %%eax\n" + "8: "__copyuser_seg" movl 12(%4),%%edx\n" + "9: movl %%eax, 8(%3)\n" + "10: movl %%edx, 12(%3)\n" + "11: "__copyuser_seg" movl 16(%4), %%eax\n" + "12: "__copyuser_seg" movl 20(%4), %%edx\n" + "13: movl %%eax, 16(%3)\n" + "14: movl %%edx, 20(%3)\n" + "15: "__copyuser_seg" movl 24(%4), %%eax\n" + "16: "__copyuser_seg" movl 28(%4), %%edx\n" + "17: movl %%eax, 24(%3)\n" + "18: movl %%edx, 28(%3)\n" + "19: "__copyuser_seg" movl 32(%4), %%eax\n" + "20: "__copyuser_seg" movl 36(%4), %%edx\n" + "21: movl %%eax, 32(%3)\n" + "22: movl %%edx, 36(%3)\n" + "23: "__copyuser_seg" movl 40(%4), %%eax\n" + "24: "__copyuser_seg" movl 44(%4), %%edx\n" + "25: movl %%eax, 40(%3)\n" + "26: movl %%edx, 44(%3)\n" + "27: "__copyuser_seg" movl 48(%4), %%eax\n" + "28: "__copyuser_seg" movl 52(%4), %%edx\n" + "29: movl %%eax, 48(%3)\n" + "30: movl %%edx, 52(%3)\n" + "31: "__copyuser_seg" movl 56(%4), %%eax\n" + "32: "__copyuser_seg" movl 60(%4), %%edx\n" + "33: movl %%eax, 56(%3)\n" + "34: movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; "__copyuser_seg" movsl\n" + "36: movl %%eax, %0\n" + "37: rep; "__copyuser_seg" movsb\n" + "100:\n" ".section .fixup,\"ax\"\n" "101: lea 0(%%eax,%0,4),%0\n" " jmp 100b\n" @@ -339,41 +452,41 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) int d0, d1; __asm__ __volatile__( " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" + "0: "__copyuser_seg" movl 32(%4), %%eax\n" " cmpl $67, %0\n" " jbe 2f\n" - "1: movl 64(%4), %%eax\n" + "1: "__copyuser_seg" movl 64(%4), %%eax\n" " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" + "2: "__copyuser_seg" movl 0(%4), %%eax\n" + "21: "__copyuser_seg" movl 4(%4), %%edx\n" " movl %%eax, 0(%3)\n" " movl %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" + "3: "__copyuser_seg" movl 8(%4), %%eax\n" + "31: "__copyuser_seg" movl 12(%4),%%edx\n" " movl %%eax, 8(%3)\n" " movl %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" + "4: "__copyuser_seg" movl 16(%4), %%eax\n" + "41: "__copyuser_seg" movl 20(%4), %%edx\n" " movl %%eax, 16(%3)\n" " movl %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" + "10: "__copyuser_seg" movl 24(%4), %%eax\n" + "51: "__copyuser_seg" movl 28(%4), %%edx\n" " movl %%eax, 24(%3)\n" " movl %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" + "11: "__copyuser_seg" movl 32(%4), %%eax\n" + "61: "__copyuser_seg" movl 36(%4), %%edx\n" " movl %%eax, 32(%3)\n" " movl %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" + "12: "__copyuser_seg" movl 40(%4), %%eax\n" + "71: "__copyuser_seg" movl 44(%4), %%edx\n" " movl %%eax, 40(%3)\n" " movl %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" + "13: "__copyuser_seg" movl 48(%4), %%eax\n" + "81: "__copyuser_seg" movl 52(%4), %%edx\n" " movl %%eax, 48(%3)\n" " movl %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" + "14: "__copyuser_seg" movl 56(%4), %%eax\n" + "91: "__copyuser_seg" movl 60(%4), %%edx\n" " movl %%eax, 56(%3)\n" " movl %%edx, 60(%3)\n" " addl $-64, %0\n" @@ -385,9 +498,9 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" - "6: rep; movsl\n" + "6: rep; "__copyuser_seg" movsl\n" " movl %%eax,%0\n" - "7: rep; movsb\n" + "7: rep; "__copyuser_seg" movsb\n" "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" @@ -440,41 +553,41 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to, __asm__ __volatile__( " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" + "0: "__copyuser_seg" movl 32(%4), %%eax\n" " cmpl $67, %0\n" " jbe 2f\n" - "1: movl 64(%4), %%eax\n" + "1: "__copyuser_seg" movl 64(%4), %%eax\n" " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" + "2: "__copyuser_seg" movl 0(%4), %%eax\n" + "21: "__copyuser_seg" movl 4(%4), %%edx\n" " movnti %%eax, 0(%3)\n" " movnti %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" + "3: "__copyuser_seg" movl 8(%4), %%eax\n" + "31: "__copyuser_seg" movl 12(%4),%%edx\n" " movnti %%eax, 8(%3)\n" " movnti %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" + "4: "__copyuser_seg" movl 16(%4), %%eax\n" + "41: "__copyuser_seg" movl 20(%4), %%edx\n" " movnti %%eax, 16(%3)\n" " movnti %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" + "10: "__copyuser_seg" movl 24(%4), %%eax\n" + "51: "__copyuser_seg" movl 28(%4), %%edx\n" " movnti %%eax, 24(%3)\n" " movnti %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" + "11: "__copyuser_seg" movl 32(%4), %%eax\n" + "61: "__copyuser_seg" movl 36(%4), %%edx\n" " movnti %%eax, 32(%3)\n" " movnti %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" + "12: "__copyuser_seg" movl 40(%4), %%eax\n" + "71: "__copyuser_seg" movl 44(%4), %%edx\n" " movnti %%eax, 40(%3)\n" " movnti %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" + "13: "__copyuser_seg" movl 48(%4), %%eax\n" + "81: "__copyuser_seg" movl 52(%4), %%edx\n" " movnti %%eax, 48(%3)\n" " movnti %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" + "14: "__copyuser_seg" movl 56(%4), %%eax\n" + "91: "__copyuser_seg" movl 60(%4), %%edx\n" " movnti %%eax, 56(%3)\n" " movnti %%edx, 60(%3)\n" " addl $-64, %0\n" @@ -487,9 +600,9 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to, " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" - "6: rep; movsl\n" + "6: rep; "__copyuser_seg" movsl\n" " movl %%eax,%0\n" - "7: rep; movsb\n" + "7: rep; "__copyuser_seg" movsb\n" "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" @@ -537,41 +650,41 @@ static unsigned long __copy_user_intel_nocache(void *to, __asm__ __volatile__( " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" + "0: "__copyuser_seg" movl 32(%4), %%eax\n" " cmpl $67, %0\n" " jbe 2f\n" - "1: movl 64(%4), %%eax\n" + "1: "__copyuser_seg" movl 64(%4), %%eax\n" " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" + "2: "__copyuser_seg" movl 0(%4), %%eax\n" + "21: "__copyuser_seg" movl 4(%4), %%edx\n" " movnti %%eax, 0(%3)\n" " movnti %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" + "3: "__copyuser_seg" movl 8(%4), %%eax\n" + "31: "__copyuser_seg" movl 12(%4),%%edx\n" " movnti %%eax, 8(%3)\n" " movnti %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" + "4: "__copyuser_seg" movl 16(%4), %%eax\n" + "41: "__copyuser_seg" movl 20(%4), %%edx\n" " movnti %%eax, 16(%3)\n" " movnti %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" + "10: "__copyuser_seg" movl 24(%4), %%eax\n" + "51: "__copyuser_seg" movl 28(%4), %%edx\n" " movnti %%eax, 24(%3)\n" " movnti %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" + "11: "__copyuser_seg" movl 32(%4), %%eax\n" + "61: "__copyuser_seg" movl 36(%4), %%edx\n" " movnti %%eax, 32(%3)\n" " movnti %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" + "12: "__copyuser_seg" movl 40(%4), %%eax\n" + "71: "__copyuser_seg" movl 44(%4), %%edx\n" " movnti %%eax, 40(%3)\n" " movnti %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" + "13: "__copyuser_seg" movl 48(%4), %%eax\n" + "81: "__copyuser_seg" movl 52(%4), %%edx\n" " movnti %%eax, 48(%3)\n" " movnti %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" + "14: "__copyuser_seg" movl 56(%4), %%eax\n" + "91: "__copyuser_seg" movl 60(%4), %%edx\n" " movnti %%eax, 56(%3)\n" " movnti %%edx, 60(%3)\n" " addl $-64, %0\n" @@ -584,9 +697,9 @@ static unsigned long __copy_user_intel_nocache(void *to, " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" - "6: rep; movsl\n" + "6: rep; "__copyuser_seg" movsl\n" " movl %%eax,%0\n" - "7: rep; movsb\n" + "7: rep; "__copyuser_seg" movsb\n" "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" @@ -629,32 +742,36 @@ static unsigned long __copy_user_intel_nocache(void *to, */ unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); -unsigned long __copy_user_intel(void __user *to, const void *from, +unsigned long __generic_copy_to_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size); unsigned long __copy_user_zeroing_intel_nocache(void *to, const void __user *from, unsigned long size); #endif /* CONFIG_X86_INTEL_USERCOPY */ /* Generic arbitrary sized copy. */ -#define __copy_user(to, from, size) \ +#define __copy_user(to, from, size, prefix, set, restore) \ do { \ int __d0, __d1, __d2; \ __asm__ __volatile__( \ + set \ " cmp $7,%0\n" \ " jbe 1f\n" \ " movl %1,%0\n" \ " negl %0\n" \ " andl $7,%0\n" \ " subl %0,%3\n" \ - "4: rep; movsb\n" \ + "4: rep; "prefix"movsb\n" \ " movl %3,%0\n" \ " shrl $2,%0\n" \ " andl $3,%3\n" \ " .align 2,0x90\n" \ - "0: rep; movsl\n" \ + "0: rep; "prefix"movsl\n" \ " movl %3,%0\n" \ - "1: rep; movsb\n" \ + "1: rep; "prefix"movsb\n" \ "2:\n" \ + restore \ ".section .fixup,\"ax\"\n" \ "5: addl %3,%0\n" \ " jmp 2b\n" \ @@ -682,14 +799,14 @@ do { \ " negl %0\n" \ " andl $7,%0\n" \ " subl %0,%3\n" \ - "4: rep; movsb\n" \ + "4: rep; "__copyuser_seg"movsb\n" \ " movl %3,%0\n" \ " shrl $2,%0\n" \ " andl $3,%3\n" \ " .align 2,0x90\n" \ - "0: rep; movsl\n" \ + "0: rep; "__copyuser_seg"movsl\n" \ " movl %3,%0\n" \ - "1: rep; movsb\n" \ + "1: rep; "__copyuser_seg"movsb\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "5: addl %3,%0\n" \ @@ -775,9 +892,9 @@ survive: } #endif if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + __copy_user(to, from, n, "", __COPYUSER_SET_ES, __COPYUSER_RESTORE_ES); else - n = __copy_user_intel(to, from, n); + n = __generic_copy_to_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -797,10 +914,9 @@ unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + __copy_user(to, from, n, __copyuser_seg, "", ""); else - n = __copy_user_intel((void __user *)to, - (const void *)from, n); + n = __generic_copy_from_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -827,59 +943,38 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); else - __copy_user(to, from, n); + __copy_user(to, from, n, __copyuser_seg, "", ""); #else - __copy_user(to, from, n); + __copy_user(to, from, n, __copyuser_seg, "", ""); #endif return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); -/** - * copy_to_user: - Copy a block of data into user space. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from kernel space to user space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -unsigned long -copy_to_user(void __user *to, const void *from, unsigned long n) +#ifdef CONFIG_PAX_MEMORY_UDEREF +void __set_fs(mm_segment_t x) { - if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); - return n; + switch (x.seg) { + case 0: + loadsegment(gs, 0); + break; + case TASK_SIZE_MAX: + loadsegment(gs, __USER_DS); + break; + case -1UL: + loadsegment(gs, __KERNEL_DS); + break; + default: + BUG(); + } + return; } -EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(__set_fs); -/** - * copy_from_user: - Copy a block of data from user space. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from user space to kernel space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +void set_fs(mm_segment_t x) { - if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; + current_thread_info()->addr_limit = x; + __set_fs(x); } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(set_fs); +#endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849..8633ad8 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -42,6 +42,12 @@ long __strncpy_from_user(char *dst, const char __user *src, long count) { long res; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < PAX_USER_SHADOW_BASE) + src += PAX_USER_SHADOW_BASE; +#endif + __do_strncpy_from_user(dst, src, count, res); return res; } @@ -65,6 +71,12 @@ unsigned long __clear_user(void __user *addr, unsigned long size) { long __d0; might_fault(); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)addr < PAX_USER_SHADOW_BASE) + addr += PAX_USER_SHADOW_BASE; +#endif + /* no memory constraint because it doesn't change any memory gcc knows about */ asm volatile( @@ -149,12 +161,20 @@ long strlen_user(const char __user *s) } EXPORT_SYMBOL(strlen_user); -unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) +unsigned long copy_in_user(void __user *to, const void __user *from, unsigned long len) { - if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { - return copy_user_generic((__force void *)to, (__force void *)from, len); - } - return len; + if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)to < PAX_USER_SHADOW_BASE) + to += PAX_USER_SHADOW_BASE; + if ((unsigned long)from < PAX_USER_SHADOW_BASE) + from += PAX_USER_SHADOW_BASE; +#endif + + return copy_user_generic((void __force_kernel *)to, (void __force_kernel *)from, len); + } + return len; } EXPORT_SYMBOL(copy_in_user); @@ -164,7 +184,7 @@ EXPORT_SYMBOL(copy_in_user); * it is not necessary to optimize tail handling. */ unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) +copy_user_handle_tail(char __user *to, char __user *from, unsigned long len, unsigned zerorest) { char c; unsigned zero_len; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca..5fef66a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,14 +1,71 @@ #include #include +#include #include +#include +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* avoid overflow */ + if (x->insn > y->insn) + return 1; + if (x->insn < y->insn) + return -1; + return 0; +} + +static void swap_ex(void *a, void *b, int size) +{ + struct exception_table_entry t, *x = a, *y = b; + + t = *x; + + pax_open_kernel(); + *x = *y; + *y = t; + pax_close_kernel(); +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + sort(start, finish - start, sizeof(struct exception_table_entry), + cmp_ex, swap_ex); +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && within_module_init(m->extable[0].insn, m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(m->extable[m->num_exentries-1].insn, m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { + if (unlikely(!v8086_mode(regs) && SEGMENT_IS_PNP_CODE(regs->cs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; pnp_bios_is_utter_crap = 1; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8ac0d76..87899a4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -11,10 +11,19 @@ #include /* __kprobes, ... */ #include /* kmmio_handler, ... */ #include /* perf_sw_event */ +#include +#include #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ #include /* kmemcheck_*(), ... */ +#include +#include + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#include +#include "../kernel/dumpstack.h" +#endif /* * Page fault error code bits: @@ -51,7 +60,7 @@ static inline int notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -112,7 +121,10 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (user_mode(regs)) { + if (__copy_from_user_inatomic(&opcode, (unsigned char __force_user *)(instr), 1)) + return 0; + } else if (probe_kernel_address(instr, opcode)) return 0; *prefetch = (instr_lo == 0xF) && @@ -146,7 +158,10 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (user_mode(regs)) { + if (__copy_from_user_inatomic(&opcode, (unsigned char __force_user *)(instr), 1)) + break; + } else if (probe_kernel_address(instr, opcode)) break; instr++; @@ -172,6 +187,34 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, force_sig_info(si_signo, &info, tsk); } +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static bool pax_is_fetch_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return NULL; + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return NULL; + return pmd; +} +#endif + DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -224,11 +267,24 @@ void vmalloc_sync_all(void) address += PMD_SIZE) { unsigned long flags; + +#ifdef CONFIG_PAX_PER_CPU_PGD + unsigned long cpu; +#else struct page *page; +#endif spin_lock_irqsave(&pgd_lock, flags); + +#ifdef CONFIG_PAX_PER_CPU_PGD + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + pgd_t *pgd = get_cpu_pgd(cpu); +#else list_for_each_entry(page, &pgd_list, lru) { - if (!vmalloc_sync_one(page_address(page), address)) + pgd_t *pgd = page_address(page); +#endif + + if (!vmalloc_sync_one(pgd, address)) break; } spin_unlock_irqrestore(&pgd_lock, flags); @@ -258,6 +314,11 @@ static noinline int vmalloc_fault(unsigned long address) * an interrupt in the middle of a task switch.. */ pgd_paddr = read_cr3(); + +#ifdef CONFIG_PAX_PER_CPU_PGD + BUG_ON(__pa(get_cpu_pgd(smp_processor_id())) != (pgd_paddr & PHYSICAL_PAGE_MASK)); +#endif + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; @@ -332,15 +393,27 @@ void vmalloc_sync_all(void) const pgd_t *pgd_ref = pgd_offset_k(address); unsigned long flags; + +#ifdef CONFIG_PAX_PER_CPU_PGD + unsigned long cpu; +#else struct page *page; +#endif if (pgd_none(*pgd_ref)) continue; spin_lock_irqsave(&pgd_lock, flags); + +#ifdef CONFIG_PAX_PER_CPU_PGD + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + pgd_t *pgd = pgd_offset_cpu(cpu, address); +#else list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pgd = (pgd_t *)page_address(page) + pgd_index(address); +#endif + if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else @@ -373,7 +446,14 @@ static noinline int vmalloc_fault(unsigned long address) * happen within a race in page table update. In the later * case just flush: */ + +#ifdef CONFIG_PAX_PER_CPU_PGD + BUG_ON(__pa(get_cpu_pgd(smp_processor_id())) != (read_cr3() & PHYSICAL_PAGE_MASK)); + pgd = pgd_offset_cpu(smp_processor_id(), address); +#else pgd = pgd_offset(current->active_mm, address); +#endif + pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; @@ -535,7 +615,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) static int is_errata100(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) + if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) && (address >> 32)) return 1; #endif return 0; @@ -562,7 +642,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) } static const char nx_warning[] = KERN_CRIT -"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; +"kernel tried to execute NX-protected page - exploit attempt? (uid: %d, task: %s, pid: %d)\n"; static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, @@ -571,15 +651,26 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, if (!oops_may_print()) return; - if (error_code & PF_INSTR) { + if (nx_enabled && (error_code & PF_INSTR)) { unsigned int level; pte_t *pte = lookup_address(address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(nx_warning, current_uid()); + printk(nx_warning, current_uid(), current->comm, task_pid_nr(current)); } +#ifdef CONFIG_PAX_KERNEXEC + if (init_mm.start_code <= address && address < init_mm.end_code) { + if (current->signal->curr_ip) + printk(KERN_ERR "PAX: From %pI4: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", + ¤t->signal->curr_ip, current->comm, task_pid_nr(current), current_uid(), current_euid()); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", + current->comm, task_pid_nr(current), current_uid(), current_euid()); + } +#endif + printk(KERN_ALERT "BUG: unable to handle kernel "); if (address < PAGE_SIZE) printk(KERN_CONT "NULL pointer dereference"); @@ -705,6 +796,23 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, { struct task_struct *tsk = current; +#ifdef CONFIG_X86_64 + struct mm_struct *mm = tsk->mm; + + if (mm && (error_code & PF_INSTR) && mm->context.vdso) { + if (regs->ip == (unsigned long)vgettimeofday) { + regs->ip = (unsigned long)VDSO64_SYMBOL(mm->context.vdso, fallback_gettimeofday); + return; + } else if (regs->ip == (unsigned long)vtime) { + regs->ip = (unsigned long)VDSO64_SYMBOL(mm->context.vdso, fallback_time); + return; + } else if (regs->ip == (unsigned long)vgetcpu) { + regs->ip = (unsigned long)VDSO64_SYMBOL(mm->context.vdso, getcpu); + return; + } + } +#endif + /* User mode accesses just cause a SIGSEGV */ if (error_code & PF_USER) { /* @@ -722,6 +830,21 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (pax_is_fetch_fault(regs, error_code, address)) { + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return; + } +#endif + + pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); + do_group_exit(SIGKILL); + } +#endif + if (unlikely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -818,7 +941,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, if (fault & VM_FAULT_HWPOISON) { printk(KERN_ERR "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", - tsk->comm, tsk->pid, address); + tsk->comm, task_pid_nr(tsk), address); code = BUS_MCEERR_AR; } #endif @@ -857,6 +980,99 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) return 1; } +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) +static int pax_handle_pageexec_fault(struct pt_regs *regs, struct mm_struct *mm, unsigned long address, unsigned long error_code) +{ + pte_t *pte; + pmd_t *pmd; + spinlock_t *ptl; + unsigned char pte_mask; + + if (nx_enabled || (error_code & (PF_PROT|PF_USER)) != (PF_PROT|PF_USER) || v8086_mode(regs) || + !(mm->pax_flags & MF_PAX_PAGEEXEC)) + return 0; + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & PF_WRITE) && (regs->ip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + up_read(&mm->mmap_sem); + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return 1; + } +#endif + + pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); + do_group_exit(SIGKILL); + } + + pmd = pax_get_pmd(mm, address); + if (unlikely(!pmd)) + return 0; + + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { + pte_unmap_unlock(pte, ptl); + return 0; + } + + if (unlikely((error_code & PF_WRITE) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + pte_unmap_unlock(pte, ptl); + return 0; + } + +#ifdef CONFIG_SMP + if (likely(address > get_limit(regs->cs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) +#else + if (likely(address > get_limit(regs->cs))) +#endif + { + set_pte(pte, pte_mkread(*pte)); + __flush_tlb_one(address); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return 1; + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & PF_WRITE) << (_PAGE_BIT_DIRTY-1)); + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "orb %2,(%1)\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + "invlpg (%0)\n" +#endif + __copyuser_seg"testb $0,(%0)\n" + "xorb %3,(%1)\n" + : + : "r" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER) + : "memory", "cc"); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return 1; +} +#endif + /* * Handle a spurious fault caused by a stale TLB entry. * @@ -923,6 +1139,9 @@ int show_unhandled_signals = 1; static inline int access_error(unsigned long error_code, int write, struct vm_area_struct *vma) { + if (nx_enabled && (error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC)) + return 1; + if (write) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) @@ -956,16 +1175,30 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; - unsigned long address; struct mm_struct *mm; int write; int fault; - tsk = current; - mm = tsk->mm; - /* Get the faulting address: */ - address = read_cr2(); + unsigned long address = read_cr2(); + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + if (!user_mode(regs) && address < 2 * PAX_USER_SHADOW_BASE) { + if (!search_exception_tables(regs->ip)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + if (address < PAX_USER_SHADOW_BASE) { + printk(KERN_ERR "PAX: please report this to pageexec@freemail.hu\n"); + printk(KERN_ERR "PAX: faulting IP: %pA\n", (void *)regs->ip); + show_trace_log_lvl(NULL, NULL, (void *)regs->sp, regs->bp, KERN_ERR); + } else + address -= PAX_USER_SHADOW_BASE; + } +#endif + + tsk = current; + mm = tsk->mm; /* * Detect and handle instructions that would cause a page fault for @@ -1026,7 +1259,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */ - if (user_mode_vm(regs)) { + if (user_mode(regs)) { local_irq_enable(); error_code |= PF_USER; } else { @@ -1080,6 +1313,11 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) might_sleep(); } +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) + if (pax_handle_pageexec_fault(regs, mm, address, error_code)) + return; +#endif + vma = find_vma(mm, address); if (unlikely(!vma)) { bad_area(regs, error_code, address); @@ -1091,18 +1329,24 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) bad_area(regs, error_code, address); return; } - if (error_code & PF_USER) { - /* - * Accessing the stack below %sp is always a bug. - * The large cushion allows instructions like enter - * and pusha to work. ("enter $65535, $31" pushes - * 32 pointers and then decrements %sp by 65535.) - */ - if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { - bad_area(regs, error_code, address); - return; - } + /* + * Accessing the stack below %sp is always a bug. + * The large cushion allows instructions like enter + * and pusha to work. ("enter $65535, $31" pushes + * 32 pointers and then decrements %sp by 65535.) + */ + if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < task_pt_regs(tsk)->sp)) { + bad_area(regs, error_code, address); + return; } + +#ifdef CONFIG_PAX_SEGMEXEC + if (unlikely((mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_end - SEGMEXEC_TASK_SIZE - 1 < address - SEGMEXEC_TASK_SIZE - 1)) { + bad_area(regs, error_code, address); + return; + } +#endif + if (unlikely(expand_stack(vma, address))) { bad_area(regs, error_code, address); return; @@ -1146,3 +1390,292 @@ good_area: up_read(&mm->mmap_sem); } + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static bool pax_is_fetch_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +{ + struct mm_struct *mm = current->mm; + unsigned long ip = regs->ip; + + if (v8086_mode(regs)) + ip = ((regs->cs & 0xffff) << 4) + (ip & 0xffff); + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR)) + return true; + if (!(error_code & (PF_PROT | PF_WRITE)) && ip == address) + return true; + return false; + } +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) { + if (!(error_code & (PF_PROT | PF_WRITE)) && (ip + SEGMEXEC_TASK_SIZE == address)) + return true; + return false; + } +#endif + + return false; +} +#endif + +#ifdef CONFIG_PAX_EMUTRAMP +static int pax_handle_fetch_fault_32(struct pt_regs *regs) +{ + int err; + + do { /* PaX: libffi trampoline emulation */ + unsigned char mov, jmp; + unsigned int addr1, addr2; + +#ifdef CONFIG_X86_64 + if ((regs->ip + 9) >> 32) + break; +#endif + + err = get_user(mov, (unsigned char __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5)); + err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); + + if (err) + break; + + if (mov == 0xB8 && jmp == 0xE9) { + regs->ax = addr1; + regs->ip = (unsigned int)(regs->ip + addr2 + 10); + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned int addr1, addr2; + +#ifdef CONFIG_X86_64 + if ((regs->ip + 11) >> 32) + break; +#endif + + err = get_user(mov1, (unsigned char __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); + err |= get_user(mov2, (unsigned char __user *)(regs->ip + 5)); + err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); + err |= get_user(jmp, (unsigned short __user *)(regs->ip + 10)); + + if (err) + break; + + if (mov1 == 0xB9 && mov2 == 0xB8 && jmp == 0xE0FF) { + regs->cx = addr1; + regs->ax = addr2; + regs->ip = addr2; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned int addr1, addr2; + +#ifdef CONFIG_X86_64 + if ((regs->ip + 9) >> 32) + break; +#endif + + err = get_user(mov, (unsigned char __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5)); + err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); + + if (err) + break; + + if (mov == 0xB9 && jmp == 0xE9) { + regs->cx = addr1; + regs->ip = (unsigned int)(regs->ip + addr2 + 10); + return 2; + } + } while (0); + + return 1; /* PaX in action */ +} + +#ifdef CONFIG_X86_64 +static int pax_handle_fetch_fault_64(struct pt_regs *regs) +{ + int err; + + do { /* PaX: libffi trampoline emulation */ + unsigned short mov1, mov2, jmp1; + unsigned char stcclc, jmp2; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned short __user *)regs->ip); + err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2)); + err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10)); + err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12)); + err |= get_user(stcclc, (unsigned char __user *)(regs->ip + 20)); + err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 21)); + err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 23)); + + if (err) + break; + + if (mov1 == 0xBB49 && mov2 == 0xBA49 && (stcclc == 0xF8 || stcclc == 0xF9) && jmp1 == 0xFF49 && jmp2 == 0xE3) { + regs->r11 = addr1; + regs->r10 = addr2; + if (stcclc == 0xF8) + regs->flags &= ~X86_EFLAGS_CF; + else + regs->flags |= X86_EFLAGS_CF; + regs->ip = addr1; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned short mov1, mov2, jmp1; + unsigned char jmp2; + unsigned int addr1; + unsigned long addr2; + + err = get_user(mov1, (unsigned short __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 2)); + err |= get_user(mov2, (unsigned short __user *)(regs->ip + 6)); + err |= get_user(addr2, (unsigned long __user *)(regs->ip + 8)); + err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 16)); + err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 18)); + + if (err) + break; + + if (mov1 == 0xBB41 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { + regs->r11 = addr1; + regs->r10 = addr2; + regs->ip = addr1; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned short mov1, mov2, jmp1; + unsigned char jmp2; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned short __user *)regs->ip); + err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2)); + err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10)); + err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12)); + err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 20)); + err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 22)); + + if (err) + break; + + if (mov1 == 0xBB49 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { + regs->r11 = addr1; + regs->r10 = addr2; + regs->ip = addr1; + return 2; + } + } while (0); + + return 1; /* PaX in action */ +} +#endif + +/* + * PaX: decide what to do with offenders (regs->ip = fault address) + * + * returns 1 when task should be killed + * 2 when gcc trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + if (v8086_mode(regs)) + return 1; + + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; + +#ifdef CONFIG_X86_32 + return pax_handle_fetch_fault_32(regs); +#else + if (regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) + return pax_handle_fetch_fault_32(regs); + else + return pax_handle_fetch_fault_64(regs); +#endif +} +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __force_user *)pc+i)) + printk(KERN_CONT "?? "); + else + printk(KERN_CONT "%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-%lu: ", (unsigned long)sizeof(long)); + for (i = -1; i < 80 / (long)sizeof(long); i++) { + unsigned long c; + if (get_user(c, (unsigned long __force_user *)sp+i)) { +#ifdef CONFIG_X86_32 + printk(KERN_CONT "???????? "); +#else + if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT))) + printk(KERN_CONT "???????? ???????? "); + else + printk(KERN_CONT "???????????????? "); +#endif + } else { +#ifdef CONFIG_X86_64 + if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT))) { + printk(KERN_CONT "%08x ", (unsigned int)c); + printk(KERN_CONT "%08x ", (unsigned int)(c >> 32)); + } else +#endif + printk(KERN_CONT "%0*lx ", 2 * (int)sizeof(long), c); + } + } + printk("\n"); +} +#endif + +/** + * probe_kernel_write(): safely attempt to write to a location + * @dst: address to write to + * @src: pointer to the data that shall be written + * @size: size of the data chunk + * + * Safely write to address @dst from the buffer at @src. If a kernel fault + * happens, handle that and return -EFAULT. + */ +long notrace probe_kernel_write(void *dst, const void *src, size_t size) +{ + long ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + pagefault_disable(); + pax_open_kernel(); + ret = __copy_to_user_inatomic((void __force_user *)dst, src, size); + pax_close_kernel(); + pagefault_enable(); + set_fs(old_fs); + + return ret ? -EFAULT : 0; +} diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bc..7a16bf4 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -237,7 +237,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + if (unlikely(!__access_ok(write ? VERIFY_WRITE : VERIFY_READ, (void __user *)start, len))) return 0; diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 63a6ba6..79abd7a 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -43,7 +43,10 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); + + pax_open_kernel(); set_pte(kmap_pte-idx, mk_pte(page, prot)); + pax_close_kernel(); return (void *)vaddr; } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f46c340..6ff9a26 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -267,13 +267,20 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; + unsigned long start_addr, pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; + start_addr = mm->free_area_cache; } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + start_addr = mm->mmap_base; + mm->cached_hole_size = 0; } full_search: @@ -281,26 +288,27 @@ full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { + if (pax_task_size - len < addr) { /* * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } + if (check_heap_stack_gap(vma, addr, len)) + break; if (addr + mm->cached_hole_size < vma->vm_start) mm->cached_hole_size = vma->vm_start - addr; addr = ALIGN(vma->vm_end, huge_page_size(h)); } + + mm->free_area_cache = addr + len; + return addr; } static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, @@ -309,10 +317,9 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, { struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; + struct vm_area_struct *vma; + unsigned long base = mm->mmap_base, addr; unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; /* don't allow allocations above current base */ if (mm->free_area_cache > base) @@ -322,64 +329,63 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, largest_hole = 0; mm->free_area_cache = base; } -try_again: + /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; /* either no address requested or cant fit in requested address hole */ - addr = (mm->free_area_cache - len) & huge_page_mask(h); + addr = (mm->free_area_cache - len); do { + addr &= huge_page_mask(h); + vma = find_vma(mm, addr); /* * Lookup failure means no vma is above this address, * i.e. return with success: - */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) - return addr; - - /* * new region fits between prev_vma->vm_end and * vma->vm_start, use it: */ - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { + if (check_heap_stack_gap(vma, addr, len)) { /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) { + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; } /* remember the largest hole we saw so far */ if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; + largest_hole = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & huge_page_mask(h); - } while (len <= vma->vm_start); + addr = skip_heap_stack_gap(vma, len); + } while (!IS_ERR_VALUE(addr)); fail: /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; + else +#endif + + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; addr = hugetlb_get_unmapped_area_bottomup(file, addr0, len, pgoff, flags); @@ -387,6 +393,7 @@ fail: /* * Restore the topdown base: */ + mm->mmap_base = base; mm->free_area_cache = base; mm->cached_hole_size = ~0UL; @@ -400,10 +407,19 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long pax_task_size = TASK_SIZE; if (len & ~huge_page_mask(h)) return -EINVAL; - if (len > TASK_SIZE) + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; + + if (len > pax_task_size) return -ENOMEM; if (flags & MAP_FIXED) { @@ -415,8 +431,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (pax_task_size - len >= addr && check_heap_stack_gap(vma, addr, len)) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 73ffd55..ad78676 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -69,11 +69,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, * cause a hotspot and fill up ZONE_DMA. The page tables * need roughly 0.5KB per GB. */ -#ifdef CONFIG_X86_32 - start = 0x7000; -#else - start = 0x8000; -#endif + start = 0x100000; e820_table_start = find_e820_area(start, max_pfn_mapped<> PAGE_SHIFT) == pagenr) + return 1; + /* allow ISA/video mem */ + if ((ISA_START_ADDRESS >> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 1; + /* throw out everything else below 1MB */ + if (pagenr <= 256) + return 0; +#else if (pagenr <= 256) return 1; +#endif + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; if (!page_is_ram(pagenr)) @@ -379,6 +392,86 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) void free_initmem(void) { + +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_X86_32 + /* PaX: limit KERNEL_CS to actual size */ + unsigned long addr, limit; + struct desc_struct d; + int cpu; + + limit = paravirt_enabled() ? ktva_ktla(0xffffffff) : (unsigned long)&_etext; + limit = (limit - 1UL) >> PAGE_SHIFT; + + memset(__LOAD_PHYSICAL_ADDR + PAGE_OFFSET, POISON_FREE_INITMEM, PAGE_SIZE); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + pack_descriptor(&d, get_desc_base(&get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS]), limit, 0x9B, 0xC); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEL_CS, &d, DESCTYPE_S); + } + + /* PaX: make KERNEL_CS read-only */ + addr = PFN_ALIGN(ktla_ktva((unsigned long)&_text)); + if (!paravirt_enabled()) + set_memory_ro(addr, (PFN_ALIGN(_sdata) - addr) >> PAGE_SHIFT); +/* + for (addr = ktla_ktva((unsigned long)&_text); addr < (unsigned long)&_sdata; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } +*/ +#ifdef CONFIG_X86_PAE + set_memory_nx(PFN_ALIGN(__init_begin), (PFN_ALIGN(__init_end) - PFN_ALIGN(__init_begin)) >> PAGE_SHIFT); +/* + for (addr = (unsigned long)&__init_begin; addr < (unsigned long)&__init_end; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); + } +*/ +#endif + +#ifdef CONFIG_MODULES + set_memory_4k((unsigned long)MODULES_EXEC_VADDR, (MODULES_EXEC_END - MODULES_EXEC_VADDR) >> PAGE_SHIFT); +#endif + +#else + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr, end; + + /* PaX: make kernel code/rodata read-only, rest non-executable */ + for (addr = __START_KERNEL_map; addr < __START_KERNEL_map + KERNEL_IMAGE_SIZE; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + continue; + if ((unsigned long)_text <= addr && addr < (unsigned long)_sdata) + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + else + set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); + } + + addr = (unsigned long)__va(__pa(__START_KERNEL_map)); + end = addr + KERNEL_IMAGE_SIZE; + for (; addr < end; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + continue; + if ((unsigned long)__va(__pa(_text)) <= addr && addr < (unsigned long)__va(__pa(_sdata))) + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } +#endif + + flush_tlb_all(); +#endif + free_init_pages("unused kernel memory", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 30938c1..bda3d5d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -72,36 +72,6 @@ static __init void *alloc_low_page(void) } /* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_bootmem) - pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); - else - pmd_table = (pmd_t *)alloc_low_page(); - paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - BUG_ON(pmd_table != pmd_offset(pud, 0)); - - return pmd_table; - } -#endif - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); - - return pmd_table; -} - -/* * Create a page table and place a pointer to it in a middle page * directory entry: */ @@ -121,13 +91,28 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) page_table = (pte_t *)alloc_low_page(); paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); +#else set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); +#endif BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } return pte_offset_kernel(pmd, 0); } +static pmd_t * __init one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); + + return pmd_table; +} + pmd_t * __init populate_extra_pmd(unsigned long vaddr) { int pgd_idx = pgd_index(vaddr); @@ -201,6 +186,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) int pgd_idx, pmd_idx; unsigned long vaddr; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte = NULL; @@ -210,8 +196,13 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) pgd = pgd_base + pgd_idx; for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - pmd = pmd + pmd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + +#ifdef CONFIG_X86_PAE + paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT); +#endif + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { pte = page_table_kmap_check(one_page_table_init(pmd), @@ -223,11 +214,20 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -static inline int is_kernel_text(unsigned long addr) +static inline int is_kernel_text(unsigned long start, unsigned long end) { - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) - return 1; - return 0; + if ((start > ktla_ktva((unsigned long)_etext) || + end <= ktla_ktva((unsigned long)_stext)) && + (start > ktla_ktva((unsigned long)_einittext) || + end <= ktla_ktva((unsigned long)_sinittext)) && + +#ifdef CONFIG_ACPI_SLEEP + (start > (unsigned long)__va(acpi_wakeup_address) + 0x4000 || end <= (unsigned long)__va(acpi_wakeup_address)) && +#endif + + (start > (unsigned long)__va(0xfffff) || end <= (unsigned long)__va(0xc0000))) + return 0; + return 1; } /* @@ -243,9 +243,10 @@ kernel_physical_mapping_init(unsigned long start, int use_pse = page_size_mask == (1<> PAGE_SHIFT); +#endif if (pfn >= end_pfn) continue; @@ -291,14 +297,13 @@ repeat: #endif for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; pmd++, pmd_idx++) { - unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; + unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET; /* * Map with big pages if possible, otherwise * create normal page tables: */ if (use_pse) { - unsigned int addr2; pgprot_t prot = PAGE_KERNEL_LARGE; /* * first pass will use the same initial @@ -308,11 +313,7 @@ repeat: __pgprot(PTE_IDENT_ATTR | _PAGE_PSE); - addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + - PAGE_OFFSET + PAGE_SIZE-1; - - if (is_kernel_text(addr) || - is_kernel_text(addr2)) + if (is_kernel_text(address, address + PMD_SIZE)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -329,7 +330,7 @@ repeat: pte_ofs = pte_index((pfn<> 10, - (unsigned long)&_etext, (unsigned long)&_edata, - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + (unsigned long)&_sdata, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_sdata) >> 10, - (unsigned long)&_text, (unsigned long)&_etext, + ktla_ktva((unsigned long)&_text), ktla_ktva((unsigned long)&_etext), ((unsigned long)&_etext - (unsigned long)&_text) >> 10); /* @@ -1007,6 +1012,7 @@ void set_kernel_text_rw(void) if (!kernel_set_to_readonly) return; + start = ktla_ktva(start); pr_debug("Set kernel text: %lx - %lx for read write\n", start, start+size); @@ -1021,6 +1027,7 @@ void set_kernel_text_ro(void) if (!kernel_set_to_readonly) return; + start = ktla_ktva(start); pr_debug("Set kernel text: %lx - %lx for read only\n", start, start+size); @@ -1032,6 +1039,7 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; + start = ktla_ktva(start); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel text: %luk\n", size >> 10); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7d095ad..25d2549 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -164,7 +164,9 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) pmd = fill_pmd(pud, vaddr); pte = fill_pte(pmd, vaddr); + pax_open_kernel(); set_pte(pte, new_pte); + pax_close_kernel(); /* * It's enough to flush this one mapping. @@ -223,14 +225,12 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size, pgd = pgd_offset_k((unsigned long)__va(phys)); if (pgd_none(*pgd)) { pud = (pud_t *) spp_getpage(); - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | - _PAGE_USER)); + set_pgd(pgd, __pgd(__pa(pud) | _PAGE_TABLE)); } pud = pud_offset(pgd, (unsigned long)__va(phys)); if (pud_none(*pud)) { pmd = (pmd_t *) spp_getpage(); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | - _PAGE_USER)); + set_pud(pud, __pud(__pa(pmd) | _PAGE_TABLE)); } pmd = pmd_offset(pud, phys); BUG_ON(!pmd_none(*pmd)); @@ -675,6 +675,12 @@ void __init mem_init(void) pci_iommu_alloc(); +#ifdef CONFIG_PAX_PER_CPU_PGD + clone_pgd_range(get_cpu_pgd(0) + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); +#endif + /* clear_bss() already clear the empty_zero_page */ reservedpages = 0; @@ -861,8 +867,8 @@ int kern_addr_valid(unsigned long addr) static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), - .vm_page_prot = PAGE_READONLY_EXEC, - .vm_flags = VM_READ | VM_EXEC + .vm_page_prot = PAGE_READONLY, + .vm_flags = VM_READ }; struct vm_area_struct *get_gate_vma(struct task_struct *tsk) @@ -896,7 +902,7 @@ int in_gate_area_no_task(unsigned long addr) const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) return "[vdso]"; if (vma == &gate_vma) return "[vsyscall]"; diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 84e236c..69bd3f6 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -65,7 +65,11 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) debug_kmap_atomic(type); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + + pax_open_kernel(); set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + pax_close_kernel(); + arch_flush_lazy_mmu_mode(); return (void *)vaddr; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2feb9bd..3646202 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -41,8 +41,8 @@ int page_is_ram(unsigned long pagenr) * Second special case: Some BIOSen report the PC BIOS * area (640->1Mb) as ram even though it is not. */ - if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && - pagenr < (BIOS_END >> PAGE_SHIFT)) + if (pagenr >= (ISA_START_ADDRESS >> PAGE_SHIFT) && + pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) return 0; for (i = 0; i < e820.nr_map; i++) { @@ -137,13 +137,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't allow anybody to remap normal RAM that we're using.. */ - for (pfn = phys_addr >> PAGE_SHIFT; - (pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); - pfn++) { - + for (pfn = phys_addr >> PAGE_SHIFT; ((resource_size_t)pfn << PAGE_SHIFT) < (last_addr & PAGE_MASK); pfn++) { int is_ram = page_is_ram(pfn); - if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) + if (is_ram && pfn_valid(pfn) && (pfn >= 0x100 || !PageReserved(pfn_to_page(pfn)))) return NULL; WARN_ON_ONCE(is_ram); } @@ -407,7 +404,7 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __read_only __aligned(PAGE_SIZE); static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -439,8 +436,7 @@ void __init early_ioremap_init(void) slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); + pmd_populate_user(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 8cc1833..1abbc5b 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -622,9 +622,9 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, * memory (e.g. tracked pages)? For now, we need this to avoid * invoking kmemcheck for PnP BIOS calls. */ - if (regs->flags & X86_VM_MASK) + if (v8086_mode(regs)) return false; - if (regs->cs != __KERNEL_CS) + if (regs->cs != __KERNEL_CS && regs->cs != __KERNEXEC_KERNEL_CS) return false; pte = kmemcheck_pte_lookup(address); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c9e57af..07a321b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -49,7 +49,7 @@ static unsigned int stack_maxrandom_size(void) * Leave an at least ~128 MB hole with possible stack randomization. */ #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) -#define MAX_GAP (TASK_SIZE/6*5) +#define MAX_GAP (pax_task_size/6*5) /* * True on X86_32 or when emulating IA32 on X86_64 @@ -94,27 +94,40 @@ static unsigned long mmap_rnd(void) return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(void) +static unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif if (gap < MIN_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); + return PAGE_ALIGN(pax_task_size - gap - mmap_rnd()); } /* * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -static unsigned long mmap_legacy_base(void) +static unsigned long mmap_legacy_base(struct mm_struct *mm) { - if (mmap_is_ia32()) + if (mmap_is_ia32()) { + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + return SEGMEXEC_TASK_UNMAPPED_BASE; + else +#endif + return TASK_UNMAPPED_BASE; - else + } else return TASK_UNMAPPED_BASE + mmap_rnd(); } @@ -125,11 +138,23 @@ static unsigned long mmap_legacy_base(void) void arch_pick_mmap_layout(struct mm_struct *mm) { if (mmap_is_legacy()) { - mm->mmap_base = mmap_legacy_base(); + mm->mmap_base = mmap_legacy_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { - mm->mmap_base = mmap_base(); + mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 132772a..b961f11 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -193,7 +193,7 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, break; default: { - unsigned char *ip = (unsigned char *)instptr; + unsigned char *ip = (unsigned char *)ktla_ktva(instptr); my_trace->opcode = MMIO_UNKNOWN_OP; my_trace->width = 0; my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | @@ -233,7 +233,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, static void ioremap_trace_core(resource_size_t offset, unsigned long size, void __iomem *addr) { - static atomic_t next_id; + static atomic_unchecked_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); /* These are page-unaligned. */ struct mmiotrace_map map = { @@ -257,7 +257,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, .private = trace }, .phys = offset, - .id = atomic_inc_return(&next_id) + .id = atomic_inc_return_unchecked(&next_id) }; map.map_id = trace->id; diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d253006..e56dd6a 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -98,7 +98,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, } #endif -extern unsigned long find_max_low_pfn(void); extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index e1d1069..2251ff3 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -36,7 +36,7 @@ enum { static int pte_testbit(pte_t pte) { - return pte_flags(pte) & _PAGE_UNUSED1; + return pte_flags(pte) & _PAGE_CPA_TEST; } struct split_state { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfb..8c12306 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -261,16 +261,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_NX; + pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask; /* * The kernel text needs to be executable for obvious reasons * Does not cover __inittext since that is gone later on. On * 64bit we do not enforce !NX on the low mapping */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) - pgprot_val(forbidden) |= _PAGE_NX; + if (within(address, ktla_ktva((unsigned long)_text), ktla_ktva((unsigned long)_etext))) + pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask; +#ifdef CONFIG_DEBUG_RODATA /* * The .rodata section needs to be read-only. Using the pfn * catches all aliases. @@ -278,6 +279,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; +#endif + +#ifdef CONFIG_PAX_KERNEXEC + if (within(pfn, __pa((unsigned long)&_text), __pa((unsigned long)&_sdata))) { + pgprot_val(forbidden) |= _PAGE_RW; + pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask; + } +#endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); @@ -331,23 +340,37 @@ EXPORT_SYMBOL_GPL(lookup_address); static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) { /* change init_mm */ + pax_open_kernel(); set_pte_atomic(kpte, pte); + #ifdef CONFIG_X86_32 if (!SHARED_KERNEL_PMD) { + +#ifdef CONFIG_PAX_PER_CPU_PGD + unsigned long cpu; +#else struct page *page; +#endif +#ifdef CONFIG_PAX_PER_CPU_PGD + for (cpu = 0; cpu < NR_CPUS; ++cpu) { + pgd_t *pgd = get_cpu_pgd(cpu); +#else list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; + pgd_t *pgd = (pgd_t *)page_address(page); +#endif + pud_t *pud; pmd_t *pmd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); + pgd += pgd_index(address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); set_pte_atomic((pte_t *)pmd, pte); } } #endif + pax_close_kernel(); } static int diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0e..de0a817 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -258,7 +258,7 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) conflict: printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, + "%Lx-%Lx %s<->%s\n", current->comm, task_pid_nr(current), new->start, new->end, cattr_name(new->type), cattr_name(entry->type)); return -EBUSY; } @@ -559,7 +559,7 @@ unlock_ret: if (err) { printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", - current->comm, current->pid, start, end); + current->comm, task_pid_nr(current), start, end); } dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); @@ -689,8 +689,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { printk(KERN_INFO - "Program %s tried to access /dev/mem between %Lx->%Lx.\n", - current->comm, from, to); + "Program %s tried to access /dev/mem between %Lx->%Lx (%Lx).\n", + current->comm, from, to, cursor); return 0; } cursor += PAGE_SIZE; @@ -755,7 +755,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " "for %Lx-%Lx\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), cattr_name(flags), base, (unsigned long long)(base + size)); return -EINVAL; @@ -813,7 +813,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d map pfn expected mapping type %s" " for %Lx-%Lx, got %s\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), cattr_name(want_flags), (unsigned long long)paddr, (unsigned long long)(paddr + size), diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index df3d5c8..c2223e1 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -148,7 +148,7 @@ enum reason_type get_ins_type(unsigned long ins_addr) int i; enum reason_type rv = OTHERS; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); @@ -168,7 +168,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) struct prefix_bits prf; int i; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); @@ -191,7 +191,7 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) struct prefix_bits prf; int i; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); @@ -417,7 +417,7 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) int i; unsigned long rv; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(reg_rop); i++) @@ -472,7 +472,7 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) int i; unsigned long rv; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(imm_wop); i++) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e0e6fad..6b90017 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -83,9 +83,52 @@ static inline void pgd_list_del(pgd_t *pgd) list_del(&page->lru); } -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +pgdval_t clone_pgd_mask __read_only = ~_PAGE_PRESENT; +void __shadow_user_pgds(pgd_t *dst, const pgd_t *src, int count) +{ + while (count--) + *dst++ = __pgd((pgd_val(*src++) | (_PAGE_NX & __supported_pte_mask)) & ~_PAGE_USER); +} +#endif + +#ifdef CONFIG_PAX_PER_CPU_PGD +void __clone_user_pgds(pgd_t *dst, const pgd_t *src, int count) +{ + while (count--) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + *dst++ = __pgd(pgd_val(*src++) & clone_pgd_mask); +#else + *dst++ = *src++; +#endif + +} +#endif + +#ifdef CONFIG_X86_64 +#define pxd_t pud_t +#define pyd_t pgd_t +#define paravirt_release_pxd(pfn) paravirt_release_pud(pfn) +#define pxd_free(mm, pud) pud_free((mm), (pud)) +#define pyd_populate(mm, pgd, pud) pgd_populate((mm), (pgd), (pud)) +#define pyd_offset(mm ,address) pgd_offset((mm), (address)) +#define PYD_SIZE PGDIR_SIZE +#else +#define pxd_t pmd_t +#define pyd_t pud_t +#define paravirt_release_pxd(pfn) paravirt_release_pmd(pfn) +#define pxd_free(mm, pud) pmd_free((mm), (pud)) +#define pyd_populate(mm, pgd, pud) pud_populate((mm), (pgd), (pud)) +#define pyd_offset(mm ,address) pud_offset((mm), (address)) +#define PYD_SIZE PUD_SIZE +#endif + +#ifdef CONFIG_PAX_PER_CPU_PGD +static inline void pgd_ctor(pgd_t *pgd) {} +static inline void pgd_dtor(pgd_t *pgd) {} +#else static void pgd_ctor(pgd_t *pgd) { /* If the pgd points to a shared pagetable level (either the @@ -119,6 +162,7 @@ static void pgd_dtor(pgd_t *pgd) pgd_list_del(pgd); spin_unlock_irqrestore(&pgd_lock, flags); } +#endif /* * List of all pgd's needed for non-PAE so it can invalidate entries @@ -131,7 +175,7 @@ static void pgd_dtor(pgd_t *pgd) * -- wli */ -#ifdef CONFIG_X86_PAE +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) /* * In PAE mode, we need to do a cr3 reload (=tlb flush) when * updating the top-level pagetable entries to guarantee the @@ -143,7 +187,7 @@ static void pgd_dtor(pgd_t *pgd) * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate * and initialize the kernel pmds here. */ -#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define PREALLOCATED_PXDS (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -161,36 +205,38 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) */ flush_tlb_mm(mm); } +#elif defined(CONFIG_X86_64) && defined(CONFIG_PAX_PER_CPU_PGD) +#define PREALLOCATED_PXDS USER_PGD_PTRS #else /* !CONFIG_X86_PAE */ /* No need to prepopulate any pagetable entries in non-PAE modes. */ -#define PREALLOCATED_PMDS 0 +#define PREALLOCATED_PXDS 0 #endif /* CONFIG_X86_PAE */ -static void free_pmds(pmd_t *pmds[]) +static void free_pxds(pxd_t *pxds[]) { int i; - for(i = 0; i < PREALLOCATED_PMDS; i++) - if (pmds[i]) - free_page((unsigned long)pmds[i]); + for(i = 0; i < PREALLOCATED_PXDS; i++) + if (pxds[i]) + free_page((unsigned long)pxds[i]); } -static int preallocate_pmds(pmd_t *pmds[]) +static int preallocate_pxds(pxd_t *pxds[]) { int i; bool failed = false; - for(i = 0; i < PREALLOCATED_PMDS; i++) { - pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); - if (pmd == NULL) + for(i = 0; i < PREALLOCATED_PXDS; i++) { + pxd_t *pxd = (pxd_t *)__get_free_page(PGALLOC_GFP); + if (pxd == NULL) failed = true; - pmds[i] = pmd; + pxds[i] = pxd; } if (failed) { - free_pmds(pmds); + free_pxds(pxds); return -ENOMEM; } @@ -203,51 +249,56 @@ static int preallocate_pmds(pmd_t *pmds[]) * preallocate which never got a corresponding vma will need to be * freed manually. */ -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) +static void pgd_mop_up_pxds(struct mm_struct *mm, pgd_t *pgdp) { int i; - for(i = 0; i < PREALLOCATED_PMDS; i++) { + for(i = 0; i < PREALLOCATED_PXDS; i++) { pgd_t pgd = pgdp[i]; if (pgd_val(pgd) != 0) { - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); + pxd_t *pxd = (pxd_t *)pgd_page_vaddr(pgd); - pgdp[i] = native_make_pgd(0); + set_pgd(pgdp + i, native_make_pgd(0)); - paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); - pmd_free(mm, pmd); + paravirt_release_pxd(pgd_val(pgd) >> PAGE_SHIFT); + pxd_free(mm, pxd); } } } -static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) +static void pgd_prepopulate_pxd(struct mm_struct *mm, pgd_t *pgd, pxd_t *pxds[]) { - pud_t *pud; + pyd_t *pyd; unsigned long addr; int i; - if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ + if (PREALLOCATED_PXDS == 0) /* Work around gcc-3.4.x bug */ return; - pud = pud_offset(pgd, 0); +#ifdef CONFIG_X86_64 + pyd = pyd_offset(mm, 0L); +#else + pyd = pyd_offset(pgd, 0L); +#endif - for (addr = i = 0; i < PREALLOCATED_PMDS; - i++, pud++, addr += PUD_SIZE) { - pmd_t *pmd = pmds[i]; + for (addr = i = 0; i < PREALLOCATED_PXDS; + i++, pyd++, addr += PYD_SIZE) { + pxd_t *pxd = pxds[i]; if (i >= KERNEL_PGD_BOUNDARY) - memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), - sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(pxd, (pxd_t *)pgd_page_vaddr(swapper_pg_dir[i]), + sizeof(pxd_t) * PTRS_PER_PMD); - pud_populate(mm, pud, pmd); + pyd_populate(mm, pyd, pxd); } } pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *pmds[PREALLOCATED_PMDS]; + pxd_t *pxds[PREALLOCATED_PXDS]; + unsigned long flags; pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); @@ -257,11 +308,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm) mm->pgd = pgd; - if (preallocate_pmds(pmds) != 0) + if (preallocate_pxds(pxds) != 0) goto out_free_pgd; if (paravirt_pgd_alloc(mm) != 0) - goto out_free_pmds; + goto out_free_pxds; /* * Make sure that pre-populating the pmds is atomic with @@ -271,14 +322,14 @@ pgd_t *pgd_alloc(struct mm_struct *mm) spin_lock_irqsave(&pgd_lock, flags); pgd_ctor(pgd); - pgd_prepopulate_pmd(mm, pgd, pmds); + pgd_prepopulate_pxd(mm, pgd, pxds); spin_unlock_irqrestore(&pgd_lock, flags); return pgd; -out_free_pmds: - free_pmds(pmds); +out_free_pxds: + free_pxds(pxds); out_free_pgd: free_page((unsigned long)pgd); out: @@ -287,7 +338,7 @@ out: void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - pgd_mop_up_pmds(mm, pgd); + pgd_mop_up_pxds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); free_page((unsigned long)pgd); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 46c8834..fcab43d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -49,10 +49,13 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) return; } pte = pte_offset_kernel(pmd, vaddr); + + pax_open_kernel(); if (pte_val(pteval)) set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); + pax_close_kernel(); /* * It's enough to flush this one mapping. diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 513d8ed..978c161 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -4,11 +4,10 @@ #include +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) int nx_enabled; -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -static int disable_nx __cpuinitdata; - +#ifndef CONFIG_PAX_PAGEEXEC /* * noexec = on|off * @@ -22,32 +21,26 @@ static int __init noexec_setup(char *str) if (!str) return -EINVAL; if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; + nx_enabled = 1; } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; + nx_enabled = 0; } return 0; } early_param("noexec", noexec_setup); #endif +#endif #ifdef CONFIG_X86_PAE void __init set_nx(void) { - unsigned int v[4], l, h; + if (!nx_enabled && cpu_has_nx) { + unsigned l, h; - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } + __supported_pte_mask &= ~_PAGE_NX; + rdmsr(MSR_EFER, l, h); + l &= ~EFER_NX; + wrmsr(MSR_EFER, l, h); } } #else @@ -62,7 +55,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || disable_nx) + if (!(efer & EFER_NX) || !nx_enabled) __supported_pte_mask &= ~_PAGE_NX; } #endif diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 36fe08e..b123d3a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -61,7 +61,11 @@ void leave_mm(int cpu) BUG(); cpumask_clear_cpu(cpu, mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); + +#ifndef CONFIG_PAX_PER_CPU_PGD load_cr3(swapper_pg_dir); +#endif + } EXPORT_SYMBOL_GPL(leave_mm); diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 829edf0..672adb3 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -115,7 +115,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) { struct frame_head *head = (struct frame_head *)frame_pointer(regs); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) dump_trace(NULL, regs, (unsigned long *)stack, 0, diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index e6a160a..36deff6 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -50,7 +50,7 @@ static inline void setup_num_counters(void) #endif } -static int inline addr_increment(void) +static inline int addr_increment(void) { #ifdef CONFIG_SMP return smp_num_siblings == 2 ? 2 : 1; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1331fcf..03901b2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -31,8 +31,8 @@ int noioapicreroute = 1; int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; -struct pci_raw_ops *raw_pci_ops; -struct pci_raw_ops *raw_pci_ext_ops; +const struct pci_raw_ops *raw_pci_ops; +const struct pci_raw_ops *raw_pci_ext_ops; int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val) diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 347d882..4baf6b6 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -79,7 +79,7 @@ static int pci_conf1_write(unsigned int seg, unsigned int bus, #undef PCI_CONF1_ADDRESS -struct pci_raw_ops pci_direct_conf1 = { +const struct pci_raw_ops pci_direct_conf1 = { .read = pci_conf1_read, .write = pci_conf1_write, }; @@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, #undef PCI_CONF2_ADDRESS -struct pci_raw_ops pci_direct_conf2 = { +const struct pci_raw_ops pci_direct_conf2 = { .read = pci_conf2_read, .write = pci_conf2_write, }; @@ -189,7 +189,7 @@ struct pci_raw_ops pci_direct_conf2 = { * This should be close to trivial, but it isn't, because there are buggy * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. */ -static int __init pci_sanity_check(struct pci_raw_ops *o) +static int __init pci_sanity_check(const struct pci_raw_ops *o) { u32 x = 0; int year, devfn; diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f10a7e9..0425342 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -125,7 +125,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, return 0; } -static struct pci_raw_ops pci_mmcfg = { +static const struct pci_raw_ops pci_mmcfg = { .read = pci_mmcfg_read, .write = pci_mmcfg_write, }; diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 94349f8..41600a7 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -104,7 +104,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus, return 0; } -static struct pci_raw_ops pci_mmcfg = { +static const struct pci_raw_ops pci_mmcfg = { .read = pci_mmcfg_read, .write = pci_mmcfg_write, }; diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 8eb295e..86bd657 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -112,7 +112,7 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, #undef PCI_CONF1_MQ_ADDRESS -static struct pci_raw_ops pci_direct_conf1_mq = { +static const struct pci_raw_ops pci_direct_conf1_mq = { .read = pci_conf1_mq_read, .write = pci_conf1_mq_write }; diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index b889d82..5a58a0a 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -297,7 +297,7 @@ static int pci_olpc_write(unsigned int seg, unsigned int bus, return 0; } -static struct pci_raw_ops pci_olpc_conf = { +static const struct pci_raw_ops pci_olpc_conf = { .read = pci_olpc_read, .write = pci_olpc_write, }; diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 1c975cc..ffd0536 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -56,50 +56,93 @@ union bios32 { static struct { unsigned long address; unsigned short segment; -} bios32_indirect = { 0, __KERNEL_CS }; +} bios32_indirect __read_only = { 0, __PCIBIOS_CS }; /* * Returns the entry point for the given service, NULL on error */ -static unsigned long bios32_service(unsigned long service) +static unsigned long __devinit bios32_service(unsigned long service) { unsigned char return_code; /* %al */ unsigned long address; /* %ebx */ unsigned long length; /* %ecx */ unsigned long entry; /* %edx */ unsigned long flags; + struct desc_struct d, *gdt; local_irq_save(flags); - __asm__("lcall *(%%edi); cld" + + gdt = get_cpu_gdt_table(smp_processor_id()); + + pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x9B, 0xC); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); + pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x93, 0xC); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); + + __asm__("movw %w7, %%ds; lcall *(%%edi); push %%ss; pop %%ds; cld" : "=a" (return_code), "=b" (address), "=c" (length), "=d" (entry) : "0" (service), "1" (0), - "D" (&bios32_indirect)); + "D" (&bios32_indirect), + "r"(__PCIBIOS_DS) + : "memory"); + + pax_open_kernel(); + gdt[GDT_ENTRY_PCIBIOS_CS].a = 0; + gdt[GDT_ENTRY_PCIBIOS_CS].b = 0; + gdt[GDT_ENTRY_PCIBIOS_DS].a = 0; + gdt[GDT_ENTRY_PCIBIOS_DS].b = 0; + pax_close_kernel(); + local_irq_restore(flags); switch (return_code) { - case 0: - return address + entry; - case 0x80: /* Not present */ - printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); - return 0; - default: /* Shouldn't happen */ - printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", - service, return_code); + case 0: { + int cpu; + unsigned char flags; + + printk(KERN_INFO "bios32_service: base:%08lx length:%08lx entry:%08lx\n", address, length, entry); + if (address >= 0xFFFF0 || length > 0x100000 - address || length <= entry) { + printk(KERN_WARNING "bios32_service: not valid\n"); return 0; + } + address = address + PAGE_OFFSET; + length += 16UL; /* some BIOSs underreport this... */ + flags = 4; + if (length >= 64*1024*1024) { + length >>= PAGE_SHIFT; + flags |= 8; + } + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + gdt = get_cpu_gdt_table(cpu); + pack_descriptor(&d, address, length, 0x9b, flags); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); + pack_descriptor(&d, address, length, 0x93, flags); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); + } + return entry; + } + case 0x80: /* Not present */ + printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", + service, return_code); + return 0; } } static struct { unsigned long address; unsigned short segment; -} pci_indirect = { 0, __KERNEL_CS }; +} pci_indirect __read_only = { 0, __PCIBIOS_CS }; -static int pci_bios_present; +static int pci_bios_present __read_only; static int __devinit check_pcibios(void) { @@ -108,11 +151,13 @@ static int __devinit check_pcibios(void) unsigned long flags, pcibios_entry; if ((pcibios_entry = bios32_service(PCI_SERVICE))) { - pci_indirect.address = pcibios_entry + PAGE_OFFSET; + pci_indirect.address = pcibios_entry; local_irq_save(flags); - __asm__( - "lcall *(%%edi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%edi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -121,7 +166,8 @@ static int __devinit check_pcibios(void) "=b" (ebx), "=c" (ecx) : "1" (PCIBIOS_PCI_BIOS_PRESENT), - "D" (&pci_indirect) + "D" (&pci_indirect), + "r" (__PCIBIOS_DS) : "memory"); local_irq_restore(flags); @@ -165,7 +211,10 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, switch (len) { case 1: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -174,7 +223,8 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, : "1" (PCIBIOS_READ_CONFIG_BYTE), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); /* * Zero-extend the result beyond 8 bits, do not trust the * BIOS having done it: @@ -182,7 +232,10 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, *value &= 0xff; break; case 2: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -191,7 +244,8 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, : "1" (PCIBIOS_READ_CONFIG_WORD), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); /* * Zero-extend the result beyond 16 bits, do not trust the * BIOS having done it: @@ -199,7 +253,10 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, *value &= 0xffff; break; case 4: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -208,7 +265,8 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, : "1" (PCIBIOS_READ_CONFIG_DWORD), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; } @@ -231,7 +289,10 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, switch (len) { case 1: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -240,10 +301,14 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; case 2: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -252,10 +317,14 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; case 4: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -264,7 +333,8 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; } @@ -278,7 +348,7 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, * Function table for BIOS32 access */ -static struct pci_raw_ops pci_bios_access = { +static const struct pci_raw_ops pci_bios_access = { .read = pci_bios_read, .write = pci_bios_write }; @@ -287,7 +357,7 @@ static struct pci_raw_ops pci_bios_access = { * Try to find PCI BIOS. */ -static struct pci_raw_ops * __devinit pci_find_bios(void) +static const struct pci_raw_ops * __devinit pci_find_bios(void) { union bios32 *check; unsigned char sum; @@ -368,10 +438,13 @@ struct irq_routing_table * pcibios_get_irq_routing_table(void) DBG("PCI: Fetching IRQ routing table... "); __asm__("push %%es\n\t" + "movw %w8, %%ds\n\t" "push %%ds\n\t" "pop %%es\n\t" - "lcall *(%%esi); cld\n\t" + "lcall *%%ss:(%%esi); cld\n\t" "pop %%es\n\t" + "push %%ss\n\t" + "pop %%ds\n" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -382,7 +455,8 @@ struct irq_routing_table * pcibios_get_irq_routing_table(void) "1" (0), "D" ((long) &opt), "S" (&pci_indirect), - "m" (opt) + "m" (opt), + "r" (__PCIBIOS_DS) : "memory"); DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); if (ret & 0xff00) @@ -406,7 +480,10 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) { int ret; - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w5, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -414,7 +491,8 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) : "0" (PCIBIOS_SET_PCI_HW_INT), "b" ((dev->bus->number << 8) | dev->devfn), "c" ((irq << 8) | (pin + 10)), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); return !(ret & 0xff00); } EXPORT_SYMBOL(pcibios_set_irq_routing); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index fa0f651..9d8f3d9 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -129,7 +129,7 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = init_tss + cpu; set_tss_desc(cpu, t); /* * This just modifies memory; should not be @@ -139,7 +139,9 @@ static void fix_processor_context(void) */ #ifdef CONFIG_X86_64 + pax_open_kernel(); get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + pax_close_kernel(); syscall_init(); /* This sets MSR_*STAR and related */ #endif diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index dd78ef6..f9d928d 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) GCOV_PROFILE := n # diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index ee55754..0013b2e 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -22,24 +22,48 @@ #include #include #include +#include #include "vextern.h" #define gtod vdso_vsyscall_gtod_data +notrace noinline long __vdso_fallback_time(long *t) +{ + long secs; + asm volatile("syscall" + : "=a" (secs) + : "0" (__NR_time),"D" (t) : "r11", "cx", "memory"); + return secs; +} + notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); + "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "r11", "cx", "memory"); return ret; } +notrace static inline cycle_t __vdso_vread_hpet(void) +{ + return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} + +notrace static inline cycle_t __vdso_vread_tsc(void) +{ + cycle_t ret = (cycle_t)vget_cycles(); + + return ret >= gtod->clock.cycle_last ? ret : gtod->clock.cycle_last; +} + notrace static inline long vgetns(void) { long v; - cycles_t (*vread)(void); - vread = gtod->clock.vread; - v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; + if (gtod->clock.name[0] == 't' && gtod->clock.name[1] == 's' && gtod->clock.name[2] == 'c' && !gtod->clock.name[3]) + v = __vdso_vread_tsc(); + else + v = __vdso_vread_hpet(); + v = (v - gtod->clock.cycle_last) & gtod->clock.mask; return (v * gtod->clock.mult) >> gtod->clock.shift; } @@ -113,7 +137,9 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) { - if (likely(gtod->sysctl_enabled)) + if (likely(gtod->sysctl_enabled && + ((gtod->clock.name[0] == 'h' && gtod->clock.name[1] == 'p' && gtod->clock.name[2] == 'e' && gtod->clock.name[3] == 't' && !gtod->clock.name[4]) || + (gtod->clock.name[0] == 't' && gtod->clock.name[1] == 's' && gtod->clock.name[2] == 'c' && !gtod->clock.name[3])))) switch (clock) { case CLOCK_REALTIME: if (likely(gtod->clock.vread)) @@ -133,10 +159,20 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) int clock_gettime(clockid_t, struct timespec *) __attribute__((weak, alias("__vdso_clock_gettime"))); +notrace noinline int __vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + long ret; + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "r11", "cx", "memory"); + return ret; +} + notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { - long ret; - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { + if (likely(gtod->sysctl_enabled && + ((gtod->clock.name[0] == 'h' && gtod->clock.name[1] == 'p' && gtod->clock.name[2] == 'e' && gtod->clock.name[3] == 't' && !gtod->clock.name[4]) || + (gtod->clock.name[0] == 't' && gtod->clock.name[1] == 's' && gtod->clock.name[2] == 'c' && !gtod->clock.name[3])))) + { if (likely(tv != NULL)) { BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != offsetof(struct timespec, tv_nsec) || @@ -151,9 +187,7 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) } return 0; } - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); - return ret; + return __vdso_fallback_gettimeofday(tv, tz); } int gettimeofday(struct timeval *, struct timezone *) __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S index 4e5dd3b..00ba15e 100644 --- a/arch/x86/vdso/vdso.lds.S +++ b/arch/x86/vdso/vdso.lds.S @@ -35,3 +35,9 @@ VDSO64_PRELINK = VDSO_PRELINK; #define VEXTERN(x) VDSO64_ ## x = vdso_ ## x; #include "vextern.h" #undef VEXTERN + +#define VEXTERN(x) VDSO64_ ## x = __vdso_ ## x; +VEXTERN(fallback_gettimeofday) +VEXTERN(fallback_time) +VEXTERN(getcpu) +#undef VEXTERN diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 58bc00f..d53fb48 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -25,6 +25,7 @@ #include #include #include +#include enum { VDSO_DISABLED = 0, @@ -226,7 +227,7 @@ static inline void map_compat_vdso(int map) void enable_sep_cpu(void) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); @@ -249,7 +250,7 @@ static int __init gate_vma_init(void) gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; - gate_vma.vm_page_prot = __P101; + gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags); /* * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully interpretable later @@ -331,14 +332,14 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (compat) addr = VDSO_HIGH_BASE; else { - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } } - current->mm->context.vdso = (void *)addr; + current->mm->context.vdso = addr; if (compat_uses_vma || !compat) { /* @@ -361,11 +362,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } current_thread_info()->sysenter_return = - VDSO32_SYMBOL(addr, SYSENTER_RETURN); + (__force void __user *)VDSO32_SYMBOL(addr, SYSENTER_RETURN); up_fail: if (ret) - current->mm->context.vdso = NULL; + current->mm->context.vdso = 0; up_write(&mm->mmap_sem); @@ -413,8 +414,14 @@ __initcall(ia32_binfmt_init); const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) return "[vdso]"; + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_mm && vma->vm_mirror && vma->vm_mirror->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; +#endif + return NULL; } @@ -423,7 +430,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) struct mm_struct *mm = tsk->mm; /* Check to see if this task was created in compat vdso mode */ - if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) + if (mm && mm->context.vdso == VDSO_HIGH_BASE) return &gate_vma; return NULL; } diff --git a/arch/x86/vdso/vextern.h b/arch/x86/vdso/vextern.h index 1683ba2..48d07f3 100644 --- a/arch/x86/vdso/vextern.h +++ b/arch/x86/vdso/vextern.h @@ -11,6 +11,5 @@ put into vextern.h and be referenced as a pointer with vdso prefix. The main kernel later fills in the values. */ -VEXTERN(jiffies) VEXTERN(vgetcpu_mode) VEXTERN(vsyscall_gtod_data) diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 21e1aeb..2c0b3c4 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -17,8 +17,6 @@ #include "vextern.h" /* Just for VMAGIC. */ #undef VEXTERN -unsigned int __read_mostly vdso_enabled = 1; - extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; @@ -27,10 +25,8 @@ static unsigned vdso_size; static inline void *var_ref(void *p, char *name) { - if (*(void **)p != (void *)VMAGIC) { - printk("VDSO: variable %s broken\n", name); - vdso_enabled = 0; - } + if (*(void **)p != (void *)VMAGIC) + panic("VDSO: variable %s broken\n", name); return p; } @@ -57,21 +53,18 @@ static int __init init_vdso_vars(void) if (!vbase) goto oom; - if (memcmp(vbase, "\177ELF", 4)) { - printk("VDSO: I'm broken; not ELF\n"); - vdso_enabled = 0; - } + if (memcmp(vbase, ELFMAG, SELFMAG)) + panic("VDSO: I'm broken; not ELF\n"); #define VEXTERN(x) \ *(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x; #include "vextern.h" #undef VEXTERN + vunmap(vbase); return 0; oom: - printk("Cannot allocate vdso\n"); - vdso_enabled = 0; - return -ENOMEM; + panic("Cannot allocate vdso\n"); } __initcall(init_vdso_vars); @@ -102,13 +95,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long addr; + unsigned long addr = 0; int ret; - if (!vdso_enabled) - return 0; - down_write(&mm->mmap_sem); + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + addr = vdso_addr(mm->start_stack, vdso_size); addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); if (IS_ERR_VALUE(addr)) { @@ -116,7 +111,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto up_fail; } - current->mm->context.vdso = (void *)addr; + current->mm->context.vdso = addr; ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| @@ -124,7 +119,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) VM_ALWAYSDUMP, vdso_pages); if (ret) { - current->mm->context.vdso = NULL; + current->mm->context.vdso = 0; goto up_fail; } @@ -132,10 +127,3 @@ up_fail: up_write(&mm->mmap_sem); return ret; } - -static __init int vdso_setup(char *s) -{ - vdso_enabled = simple_strtoul(s, NULL, 0); - return 0; -} -__setup("vdso=", vdso_setup); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0087b00..eecb34f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -71,8 +71,6 @@ EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; -void *xen_initial_gdt; - /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -548,7 +546,7 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) preempt_disable(); - start = __get_cpu_var(idt_desc).address; + start = (unsigned long)__get_cpu_var(idt_desc).address; end = start + __get_cpu_var(idt_desc).size + 1; xen_mc_flush(); @@ -993,7 +991,7 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { #endif }; -static void xen_reboot(int reason) +static __noreturn void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1001,17 +999,17 @@ static void xen_reboot(int reason) BUG(); } -static void xen_restart(char *msg) +static __noreturn void xen_restart(char *msg) { xen_reboot(SHUTDOWN_reboot); } -static void xen_emergency_restart(void) +static __noreturn void xen_emergency_restart(void) { xen_reboot(SHUTDOWN_reboot); } -static void xen_machine_halt(void) +static __noreturn void xen_machine_halt(void) { xen_reboot(SHUTDOWN_poweroff); } @@ -1095,9 +1093,20 @@ asmlinkage void __init xen_start_kernel(void) */ __userpte_alloc_gfp &= ~__GFP_HIGHMEM; -#ifdef CONFIG_X86_64 /* Work out if we support NX */ - check_efer(); +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + if ((cpuid_eax(0x80000000) & 0xffff0000) == 0x80000000 && + (cpuid_edx(0x80000001) & (1U << (X86_FEATURE_NX & 31)))) { + unsigned l, h; + +#ifdef CONFIG_X86_PAE + nx_enabled = 1; +#endif + __supported_pte_mask |= _PAGE_NX; + rdmsr(MSR_EFER, l, h); + l |= EFER_NX; + wrmsr(MSR_EFER, l, h); + } #endif xen_setup_features(); @@ -1129,13 +1138,6 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; - /* - * The only reliable way to retain the initial address of the - * percpu gdt_page is to remember it here, so we can go and - * mark it RW later, when the initial percpu area is freed. - */ - xen_initial_gdt = &per_cpu(gdt_page, 0); - xen_smp_init(); pgd = (pgd_t *)xen_start_info->pt_base; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3f90a2c..2c2ad84 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1719,6 +1719,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, convert_pfn_mfn(init_level4_pgt); convert_pfn_mfn(level3_ident_pgt); convert_pfn_mfn(level3_kernel_pgt); + convert_pfn_mfn(level3_vmalloc_start_pgt); + convert_pfn_mfn(level3_vmalloc_end_pgt); + convert_pfn_mfn(level3_vmemmap_pgt); l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); @@ -1737,7 +1740,11 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_vmalloc_start_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_vmalloc_end_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_vmemmap_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_vmemmap_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1860,6 +1867,7 @@ static __init void xen_post_allocator_init(void) pv_mmu_ops.set_pud = xen_set_pud; #if PAGETABLE_LEVELS == 4 pv_mmu_ops.set_pgd = xen_set_pgd; + pv_mmu_ops.set_pgd_batched = xen_set_pgd; #endif /* This will work as long as patching hasn't happened yet @@ -1946,6 +1954,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, + .set_pgd_batched = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a96204a..fca9b8e 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -168,11 +168,6 @@ static void __init xen_smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); - - /* We've switched to the "real" per-cpu gdt, so make sure the - old memory can be recycled */ - make_lowmem_page_readwrite(xen_initial_gdt); - xen_setup_vcpu_info_placement(); } @@ -241,12 +236,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_table(cpu); ctxt->flags = VGCF_IN_KERNEL; - ctxt->user_regs.ds = __USER_DS; - ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.ds = __KERNEL_DS; + ctxt->user_regs.es = __KERNEL_DS; ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = __KERNEL_STACK_CANARY; + savesegment(gs, ctxt->user_regs.gs); #else ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif @@ -297,13 +292,12 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) int rc; per_cpu(current_task, cpu) = idle; + per_cpu(current_tinfo, cpu) = &idle->tinfo; #ifdef CONFIG_X86_32 irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; + per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(idle) - 16 + THREAD_SIZE; #endif xen_setup_runstate_info(cpu); xen_setup_timer(cpu); diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 9a95a9c..4f39e774 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -83,14 +83,14 @@ ENTRY(xen_iret) ESP_OFFSET=4 # bytes pushed onto stack /* - * Store vcpu_info pointer for easy access. Do it this way to - * avoid having to reload %fs + * Store vcpu_info pointer for easy access. */ #ifdef CONFIG_SMP - GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov per_cpu__xen_vcpu(%eax), %eax + push %fs + mov $(__KERNEL_PERCPU), %eax + mov %eax, %fs + mov PER_CPU_VAR(xen_vcpu), %eax + pop %fs #else movl per_cpu__xen_vcpu, %eax #endif diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1a5ff24..a187d40 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -19,6 +19,17 @@ ENTRY(startup_xen) #ifdef CONFIG_X86_32 mov %esi,xen_start_info mov $init_thread_union+THREAD_SIZE,%esp +#ifdef CONFIG_SMP + movl $cpu_gdt_table,%edi + movl $__per_cpu_load,%eax + movw %ax,__KERNEL_PERCPU + 2(%edi) + rorl $16,%eax + movb %al,__KERNEL_PERCPU + 4(%edi) + movb %ah,__KERNEL_PERCPU + 7(%edi) + movl $__per_cpu_end - 1,%eax + subl $__per_cpu_start,%eax + movw %ax,__KERNEL_PERCPU + 0(%edi) +#endif #else mov %rsi,xen_start_info mov $init_thread_union+THREAD_SIZE,%rsp diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f9153a3..51eab3d 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,8 +10,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -extern void *xen_initial_gdt; - struct trap_info; void xen_copy_trap_info(struct trap_info *traps); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 15c6308..96e83c2 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -278,7 +278,7 @@ static struct attribute *integrity_attrs[] = { NULL, }; -static struct sysfs_ops integrity_ops = { +static const struct sysfs_ops integrity_ops = { .show = &integrity_attr_show, .store = &integrity_attr_store, }; diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index ca56420..f2fc409 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -77,7 +77,7 @@ void blk_iopoll_complete(struct blk_iopoll *iopoll) } EXPORT_SYMBOL(blk_iopoll_complete); -static void blk_iopoll_softirq(struct softirq_action *h) +static void blk_iopoll_softirq(void) { struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); int rearm = 0, budget = blk_iopoll_budget; diff --git a/block/blk-map.c b/block/blk-map.c index 30a7e51..0aeec6a 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -54,7 +54,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; - if (blk_rq_aligned(q, ubuf, len) && !map_data) + if (blk_rq_aligned(q, (__force void *)ubuf, len) && !map_data) bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); @@ -201,12 +201,13 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, for (i = 0; i < iov_count; i++) { unsigned long uaddr = (unsigned long)iov[i].iov_base; + if (!iov[i].iov_len) + return -EINVAL; + if (uaddr & queue_dma_alignment(q)) { unaligned = 1; break; } - if (!iov[i].iov_len) - return -EINVAL; } if (unaligned || (q->dma_pad_mask & len) || map_data) @@ -299,7 +300,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf); + do_copy = !blk_rq_aligned(q, kbuf, len) || object_starts_on_stack(kbuf); if (do_copy) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ee9c216..58d410a 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -17,7 +17,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); * Softirq action handler - move entries to local list and loop over them * while passing them to the queue registered handler. */ -static void blk_done_softirq(struct softirq_action *h) +static void blk_done_softirq(void) { struct list_head *cpu_list, local_list; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index bb9c5ea..5330d48 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -414,7 +414,7 @@ static void blk_release_queue(struct kobject *kobj) kmem_cache_free(blk_requestq_cachep, q); } -static struct sysfs_ops queue_sysfs_ops = { +static const struct sysfs_ops queue_sysfs_ops = { .show = queue_attr_show, .store = queue_attr_store, }; diff --git a/block/bsg.c b/block/bsg.c index 7154a7a..08ac2f0 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -175,16 +175,24 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, fmode_t has_write_perm) { + unsigned char tmpcmd[sizeof(rq->__cmd)]; + unsigned char *cmdptr; + if (hdr->request_len > BLK_MAX_CDB) { rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); if (!rq->cmd) return -ENOMEM; - } + cmdptr = rq->cmd; + } else + cmdptr = tmpcmd; - if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request, + if (copy_from_user(cmdptr, (void __user *)(unsigned long)hdr->request, hdr->request_len)) return -EFAULT; + if (cmdptr != rq->cmd) + memcpy(rq->cmd, cmdptr, hdr->request_len); + if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { if (blk_verify_command(rq->cmd, has_write_perm)) return -EPERM; @@ -282,7 +290,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, rq->next_rq = next_rq; next_rq->cmd_type = rq->cmd_type; - dxferp = (void*)(unsigned long)hdr->din_xferp; + dxferp = (void __user *)(unsigned long)hdr->din_xferp; ret = blk_rq_map_user(q, next_rq, NULL, dxferp, hdr->din_xfer_len, GFP_KERNEL); if (ret) @@ -291,10 +299,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, if (hdr->dout_xfer_len) { dxfer_len = hdr->dout_xfer_len; - dxferp = (void*)(unsigned long)hdr->dout_xferp; + dxferp = (void __user *)(unsigned long)hdr->dout_xferp; } else if (hdr->din_xfer_len) { dxfer_len = hdr->din_xfer_len; - dxferp = (void*)(unsigned long)hdr->din_xferp; + dxferp = (void __user *)(unsigned long)hdr->din_xferp; } else dxfer_len = 0; @@ -436,7 +444,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, int len = min_t(unsigned int, hdr->max_response_len, rq->sense_len); - ret = copy_to_user((void*)(unsigned long)hdr->response, + ret = copy_to_user((void __user *)(unsigned long)hdr->response, rq->sense, len); if (!ret) hdr->response_len = len; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 9bd086c..ca1fc22 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -354,7 +354,7 @@ static int compat_fd_ioctl(struct block_device *bdev, fmode_t mode, err |= __get_user(f->spec1, &uf->spec1); err |= __get_user(f->fmt_gap, &uf->fmt_gap); err |= __get_user(name, &uf->name); - f->name = compat_ptr(name); + f->name = (void __force_kernel *)compat_ptr(name); if (err) { err = -EFAULT; goto out; diff --git a/block/elevator.c b/block/elevator.c index a847046..75a1746 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -889,7 +889,7 @@ elv_attr_store(struct kobject *kobj, struct attribute *attr, return error; } -static struct sysfs_ops elv_sysfs_ops = { +static const struct sysfs_ops elv_sysfs_ops = { .show = elv_attr_show, .store = elv_attr_store, }; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 114ee29..d0efa50 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -220,8 +221,20 @@ EXPORT_SYMBOL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, fmode_t mode) { - if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) + unsigned char tmpcmd[sizeof(rq->__cmd)]; + unsigned char *cmdptr; + + if (rq->cmd != rq->__cmd) + cmdptr = rq->cmd; + else + cmdptr = tmpcmd; + + if (copy_from_user(cmdptr, hdr->cmdp, hdr->cmd_len)) return -EFAULT; + + if (cmdptr != rq->cmd) + memcpy(rq->cmd, cmdptr, hdr->cmd_len); + if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) return -EPERM; @@ -430,6 +443,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, int err; unsigned int in_len, out_len, bytes, opcode, cmdlen; char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; + unsigned char tmpcmd[sizeof(rq->__cmd)]; + unsigned char *cmdptr; if (!sic) return -EINVAL; @@ -463,9 +478,18 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, */ err = -EFAULT; rq->cmd_len = cmdlen; - if (copy_from_user(rq->cmd, sic->data, cmdlen)) + + if (rq->cmd != rq->__cmd) + cmdptr = rq->cmd; + else + cmdptr = tmpcmd; + + if (copy_from_user(cmdptr, sic->data, cmdlen)) goto error; + if (rq->cmd != cmdptr) + memcpy(rq->cmd, cmdptr, cmdlen); + if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; @@ -689,9 +713,54 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod } EXPORT_SYMBOL(scsi_cmd_ioctl); +int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) +{ + if (bd && bd == bd->bd_contains) + return 0; + + /* Actually none of these is particularly useful on a partition, + * but they are safe. + */ + switch (cmd) { + case SCSI_IOCTL_GET_IDLUN: + case SCSI_IOCTL_GET_BUS_NUMBER: + case SCSI_IOCTL_GET_PCI: + case SCSI_IOCTL_PROBE_HOST: + case SG_GET_VERSION_NUM: + case SG_SET_TIMEOUT: + case SG_GET_TIMEOUT: + case SG_GET_RESERVED_SIZE: + case SG_SET_RESERVED_SIZE: + case SG_EMULATED_HOST: + return 0; + case CDROM_GET_CAPABILITY: + /* Keep this until we remove the printk below. udev sends it + * and we do not want to spam dmesg about it. CD-ROMs do + * not have partitions, so we get here only for disks. + */ + return -ENOIOCTLCMD; + default: + break; + } + + /* In particular, rule out all resets and host-specific ioctls. */ + if (printk_ratelimit()) + printk(KERN_WARNING "%s: sending ioctl %x to a partition!\n", + current->comm, cmd); + + return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD; +} +EXPORT_SYMBOL(scsi_verify_blk_ioctl); + int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode, unsigned int cmd, void __user *arg) { + int ret; + + ret = scsi_verify_blk_ioctl(bd, cmd); + if (ret < 0) + return ret; + return scsi_cmd_ioctl(bd->bd_disk->queue, bd->bd_disk, mode, cmd, arg); } EXPORT_SYMBOL(scsi_cmd_blk_ioctl); diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 3533582..f143117 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -50,7 +50,7 @@ struct cryptd_blkcipher_ctx { struct cryptd_blkcipher_request_ctx { crypto_completion_t complete; -}; +} __no_const; struct cryptd_hash_ctx { struct crypto_shash *child; diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c index a90d260..7a9765e 100644 --- a/crypto/gf128mul.c +++ b/crypto/gf128mul.c @@ -182,7 +182,7 @@ void gf128mul_lle(be128 *r, const be128 *b) for (i = 0; i < 7; ++i) gf128mul_x_lle(&p[i + 1], &p[i]); - memset(r, 0, sizeof(r)); + memset(r, 0, sizeof(*r)); for (i = 0;;) { u8 ch = ((u8 *)b)[15 - i]; @@ -220,7 +220,7 @@ void gf128mul_bbe(be128 *r, const be128 *b) for (i = 0; i < 7; ++i) gf128mul_x_bbe(&p[i + 1], &p[i]); - memset(r, 0, sizeof(r)); + memset(r, 0, sizeof(*r)); for (i = 0;;) { u8 ch = ((u8 *)b)[i]; diff --git a/crypto/serpent.c b/crypto/serpent.c index b651a55..023297d 100644 --- a/crypto/serpent.c +++ b/crypto/serpent.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Key is padded to the maximum of 256 bits before round key generation. * Any key length <= 256 bits (32 bytes) is allowed by the algorithm. @@ -224,6 +225,8 @@ static int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, u32 r0,r1,r2,r3,r4; int i; + pax_track_stack(); + /* Copy key, add padding */ for (i = 0; i < keylen; ++i) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 0d2cdb8..d8de48d 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -30,7 +30,7 @@ #include #include -#define ACPI_PROCESSOR_AGGREGATOR_CLASS "processor_aggregator" +#define ACPI_PROCESSOR_AGGREGATOR_CLASS "acpi_pad" #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator" #define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80 static DEFINE_MUTEX(isolated_cpus_lock); diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 3f4602b..2e41d36 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -763,7 +763,7 @@ DECLARE_FILE_FUNCTIONS(alarm); } static struct battery_file { - struct file_operations ops; + const struct file_operations ops; mode_t mode; const char *name; } acpi_battery_file[] = { diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 7338b6a..82f0257 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -77,7 +77,7 @@ struct dock_dependent_device { struct list_head list; struct list_head hotplug_list; acpi_handle handle; - struct acpi_dock_ops *ops; + const struct acpi_dock_ops *ops; void *context; }; @@ -605,7 +605,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier); * the dock driver after _DCK is executed. */ int -register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, +register_hotplug_dock_device(acpi_handle handle, const struct acpi_dock_ops *ops, void *context) { struct dock_dependent_device *dd; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7c1c59e..2993595 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -523,6 +523,8 @@ acpi_os_read_memory(acpi_physical_address phys_addr, u32 * value, u32 width) void __iomem *virt_addr; virt_addr = ioremap(phys_addr, width); + if (!virt_addr) + return AE_NO_MEMORY; if (!value) value = &dummy; @@ -551,6 +553,8 @@ acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) void __iomem *virt_addr; virt_addr = ioremap(phys_addr, width); + if (!virt_addr) + return AE_NO_MEMORY; switch (width) { case 8: diff --git a/drivers/acpi/power_meter.c b/drivers/acpi/power_meter.c index c216062..eec10d2 100644 --- a/drivers/acpi/power_meter.c +++ b/drivers/acpi/power_meter.c @@ -315,8 +315,6 @@ static ssize_t set_trip(struct device *dev, struct device_attribute *devattr, return res; temp /= 1000; - if (temp < 0) - return -EINVAL; mutex_lock(&resource->lock); resource->trip[attr->index - 7] = temp; diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index d0d25e2..961643d 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -391,20 +391,15 @@ acpi_system_write_wakeup_device(struct file *file, size_t count, loff_t * ppos) { struct list_head *node, *next; - char strbuf[5]; - char str[5] = ""; - unsigned int len = count; + char strbuf[5] = {0}; struct acpi_device *found_dev = NULL; - if (len > 4) - len = 4; - if (len < 0) - return -EFAULT; + if (count > 4) + count = 4; - if (copy_from_user(strbuf, buffer, len)) + if (copy_from_user(strbuf, buffer, count)) return -EFAULT; - strbuf[len] = '\0'; - sscanf(strbuf, "%s", str); + strbuf[count] = '\0'; mutex_lock(&acpi_device_lock); list_for_each_safe(node, next, &acpi_wakeup_device_list) { @@ -413,7 +408,7 @@ acpi_system_write_wakeup_device(struct file *file, if (!dev->wakeup.flags.valid) continue; - if (!strncmp(dev->pnp.bus_id, str, 4)) { + if (!strncmp(dev->pnp.bus_id, strbuf, 4)) { dev->wakeup.state.enabled = dev->wakeup.state.enabled ? 0 : 1; found_dev = dev; diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 7102474..de8ad22 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -790,7 +790,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) return 0; } - BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); + BUG_ON(pr->id >= nr_cpu_ids); /* * Buggy BIOS check diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index d933980..5761f13 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -17,7 +17,7 @@ #define PREFIX "ACPI: " -#define ACPI_SMB_HC_CLASS "smbus_host_controller" +#define ACPI_SMB_HC_CLASS "smbus_host_ctl" #define ACPI_SMB_HC_DEVICE_NAME "ACPI SMBus HC" struct acpi_smb_hc { diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 0458094..6978e7b 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -283,7 +283,7 @@ static int acpi_suspend_state_valid(suspend_state_t pm_state) } } -static struct platform_suspend_ops acpi_suspend_ops = { +static const struct platform_suspend_ops acpi_suspend_ops = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin, .prepare_late = acpi_pm_prepare, @@ -311,7 +311,7 @@ static int acpi_suspend_begin_old(suspend_state_t pm_state) * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has * been requested. */ -static struct platform_suspend_ops acpi_suspend_ops_old = { +static const struct platform_suspend_ops acpi_suspend_ops_old = { .valid = acpi_suspend_state_valid, .begin = acpi_suspend_begin_old, .prepare_late = acpi_pm_disable_gpes, @@ -460,7 +460,7 @@ static void acpi_pm_enable_gpes(void) acpi_enable_all_runtime_gpes(); } -static struct platform_hibernation_ops acpi_hibernation_ops = { +static const struct platform_hibernation_ops acpi_hibernation_ops = { .begin = acpi_hibernation_begin, .end = acpi_pm_end, .pre_snapshot = acpi_hibernation_pre_snapshot, @@ -513,7 +513,7 @@ static int acpi_hibernation_pre_snapshot_old(void) * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has * been requested. */ -static struct platform_hibernation_ops acpi_hibernation_ops_old = { +static const struct platform_hibernation_ops acpi_hibernation_ops_old = { .begin = acpi_hibernation_begin_old, .end = acpi_pm_end, .pre_snapshot = acpi_hibernation_pre_snapshot_old, diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 05dff63..b662ab7 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -359,7 +359,7 @@ static int acpi_video_set_brightness(struct backlight_device *bd) vd->brightness->levels[request_level]); } -static struct backlight_ops acpi_backlight_ops = { +static const struct backlight_ops acpi_backlight_ops = { .get_brightness = acpi_video_get_brightness, .update_status = acpi_video_set_brightness, }; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6787aab..23ffb0e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -387,7 +387,7 @@ static struct scsi_host_template ahci_sht = { .sdev_attrs = ahci_sdev_attrs, }; -static struct ata_port_operations ahci_ops = { +static const struct ata_port_operations ahci_ops = { .inherits = &sata_pmp_port_ops, .qc_defer = sata_pmp_qc_defer_cmd_switch, @@ -424,17 +424,17 @@ static struct ata_port_operations ahci_ops = { .port_stop = ahci_port_stop, }; -static struct ata_port_operations ahci_vt8251_ops = { +static const struct ata_port_operations ahci_vt8251_ops = { .inherits = &ahci_ops, .hardreset = ahci_vt8251_hardreset, }; -static struct ata_port_operations ahci_p5wdh_ops = { +static const struct ata_port_operations ahci_p5wdh_ops = { .inherits = &ahci_ops, .hardreset = ahci_p5wdh_hardreset, }; -static struct ata_port_operations ahci_sb600_ops = { +static const struct ata_port_operations ahci_sb600_ops = { .inherits = &ahci_ops, .softreset = ahci_sb600_softreset, .pmp_softreset = ahci_sb600_softreset, diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c index 99e7196..4968c77 100644 --- a/drivers/ata/ata_generic.c +++ b/drivers/ata/ata_generic.c @@ -104,7 +104,7 @@ static struct scsi_host_template generic_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations generic_port_ops = { +static const struct ata_port_operations generic_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_unknown, .set_mode = generic_set_mode, diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index c33591d..000c121 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -318,7 +318,7 @@ static struct scsi_host_template piix_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations piix_pata_ops = { +static const struct ata_port_operations piix_pata_ops = { .inherits = &ata_bmdma32_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = piix_set_piomode, @@ -326,22 +326,22 @@ static struct ata_port_operations piix_pata_ops = { .prereset = piix_pata_prereset, }; -static struct ata_port_operations piix_vmw_ops = { +static const struct ata_port_operations piix_vmw_ops = { .inherits = &piix_pata_ops, .bmdma_status = piix_vmw_bmdma_status, }; -static struct ata_port_operations ich_pata_ops = { +static const struct ata_port_operations ich_pata_ops = { .inherits = &piix_pata_ops, .cable_detect = ich_pata_cable_detect, .set_dmamode = ich_set_dmamode, }; -static struct ata_port_operations piix_sata_ops = { +static const struct ata_port_operations piix_sata_ops = { .inherits = &ata_bmdma_port_ops, }; -static struct ata_port_operations piix_sidpr_sata_ops = { +static const struct ata_port_operations piix_sidpr_sata_ops = { .inherits = &piix_sata_ops, .hardreset = sata_std_hardreset, .scr_read = piix_sidpr_scr_read, diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index b0882cd..c295d65 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -223,12 +223,12 @@ static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data) ata_acpi_uevent(dev->link->ap, dev, event); } -static struct acpi_dock_ops ata_acpi_dev_dock_ops = { +static const struct acpi_dock_ops ata_acpi_dev_dock_ops = { .handler = ata_acpi_dev_notify_dock, .uevent = ata_acpi_dev_uevent, }; -static struct acpi_dock_ops ata_acpi_ap_dock_ops = { +static const struct acpi_dock_ops ata_acpi_ap_dock_ops = { .handler = ata_acpi_ap_notify_dock, .uevent = ata_acpi_ap_uevent, }; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d4f7f99..94f603e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4954,7 +4954,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) struct ata_port *ap; unsigned int tag; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ ap = qc->ap; qc->flags = 0; @@ -4970,7 +4970,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) struct ata_port *ap; struct ata_link *link; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE)); ap = qc->ap; link = qc->dev->link; @@ -5987,7 +5987,7 @@ static void ata_host_stop(struct device *gendev, void *res) * LOCKING: * None. */ -static void ata_finalize_port_ops(struct ata_port_operations *ops) +static void ata_finalize_port_ops(const struct ata_port_operations *ops) { static DEFINE_SPINLOCK(lock); const struct ata_port_operations *cur; @@ -5999,6 +5999,7 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops) return; spin_lock(&lock); + pax_open_kernel(); for (cur = ops->inherits; cur; cur = cur->inherits) { void **inherit = (void **)cur; @@ -6012,8 +6013,9 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops) if (IS_ERR(*pp)) *pp = NULL; - ops->inherits = NULL; + *(struct ata_port_operations **)&ops->inherits = NULL; + pax_close_kernel(); spin_unlock(&lock); } @@ -6110,7 +6112,7 @@ int ata_host_start(struct ata_host *host) */ /* KILLME - the only user left is ipr */ void ata_host_init(struct ata_host *host, struct device *dev, - unsigned long flags, struct ata_port_operations *ops) + unsigned long flags, const struct ata_port_operations *ops) { spin_lock_init(&host->lock); host->dev = dev; @@ -6773,7 +6775,7 @@ static void ata_dummy_error_handler(struct ata_port *ap) /* truly dummy */ } -struct ata_port_operations ata_dummy_port_ops = { +const struct ata_port_operations ata_dummy_port_ops = { .qc_prep = ata_noop_qc_prep, .qc_issue = ata_dummy_qc_issue, .error_handler = ata_dummy_error_handler, diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index e5bdb9b..45a8e72 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2423,6 +2423,8 @@ void ata_eh_report(struct ata_port *ap) { struct ata_link *link; + pax_track_stack(); + ata_for_each_link(link, ap, HOST_FIRST) ata_eh_link_report(link); } @@ -3594,7 +3596,7 @@ void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, */ void ata_std_error_handler(struct ata_port *ap) { - struct ata_port_operations *ops = ap->ops; + const struct ata_port_operations *ops = ap->ops; ata_reset_fn_t hardreset = ops->hardreset; /* ignore built-in hardreset if SCR access is not available */ diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 51f0ffb..19ce3e3 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -841,7 +841,7 @@ static int sata_pmp_handle_link_fail(struct ata_link *link, int *link_tries) */ static int sata_pmp_eh_recover(struct ata_port *ap) { - struct ata_port_operations *ops = ap->ops; + const struct ata_port_operations *ops = ap->ops; int pmp_tries, link_tries[SATA_PMP_MAX_PORTS]; struct ata_link *pmp_link = &ap->link; struct ata_device *pmp_dev = pmp_link->device; diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c index d8f35fe..288180a 100644 --- a/drivers/ata/pata_acpi.c +++ b/drivers/ata/pata_acpi.c @@ -215,7 +215,7 @@ static struct scsi_host_template pacpi_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations pacpi_ops = { +static const struct ata_port_operations pacpi_ops = { .inherits = &ata_bmdma_port_ops, .qc_issue = pacpi_qc_issue, .cable_detect = pacpi_cable_detect, diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c index 9434114..1f2f364 100644 --- a/drivers/ata/pata_ali.c +++ b/drivers/ata/pata_ali.c @@ -365,7 +365,7 @@ static struct scsi_host_template ali_sht = { * Port operations for PIO only ALi */ -static struct ata_port_operations ali_early_port_ops = { +static const struct ata_port_operations ali_early_port_ops = { .inherits = &ata_sff_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = ali_set_piomode, @@ -382,7 +382,7 @@ static const struct ata_port_operations ali_dma_base_ops = { * Port operations for DMA capable ALi without cable * detect */ -static struct ata_port_operations ali_20_port_ops = { +static const struct ata_port_operations ali_20_port_ops = { .inherits = &ali_dma_base_ops, .cable_detect = ata_cable_40wire, .mode_filter = ali_20_filter, @@ -393,7 +393,7 @@ static struct ata_port_operations ali_20_port_ops = { /* * Port operations for DMA capable ALi with cable detect */ -static struct ata_port_operations ali_c2_port_ops = { +static const struct ata_port_operations ali_c2_port_ops = { .inherits = &ali_dma_base_ops, .check_atapi_dma = ali_check_atapi_dma, .cable_detect = ali_c2_cable_detect, @@ -404,7 +404,7 @@ static struct ata_port_operations ali_c2_port_ops = { /* * Port operations for DMA capable ALi with cable detect */ -static struct ata_port_operations ali_c4_port_ops = { +static const struct ata_port_operations ali_c4_port_ops = { .inherits = &ali_dma_base_ops, .check_atapi_dma = ali_check_atapi_dma, .cable_detect = ali_c2_cable_detect, @@ -414,7 +414,7 @@ static struct ata_port_operations ali_c4_port_ops = { /* * Port operations for DMA capable ALi with cable detect and LBA48 */ -static struct ata_port_operations ali_c5_port_ops = { +static const struct ata_port_operations ali_c5_port_ops = { .inherits = &ali_dma_base_ops, .check_atapi_dma = ali_check_atapi_dma, .dev_config = ali_warn_atapi_dma, diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c index 567f3f7..c8ee0da 100644 --- a/drivers/ata/pata_amd.c +++ b/drivers/ata/pata_amd.c @@ -397,28 +397,28 @@ static const struct ata_port_operations amd_base_port_ops = { .prereset = amd_pre_reset, }; -static struct ata_port_operations amd33_port_ops = { +static const struct ata_port_operations amd33_port_ops = { .inherits = &amd_base_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = amd33_set_piomode, .set_dmamode = amd33_set_dmamode, }; -static struct ata_port_operations amd66_port_ops = { +static const struct ata_port_operations amd66_port_ops = { .inherits = &amd_base_port_ops, .cable_detect = ata_cable_unknown, .set_piomode = amd66_set_piomode, .set_dmamode = amd66_set_dmamode, }; -static struct ata_port_operations amd100_port_ops = { +static const struct ata_port_operations amd100_port_ops = { .inherits = &amd_base_port_ops, .cable_detect = ata_cable_unknown, .set_piomode = amd100_set_piomode, .set_dmamode = amd100_set_dmamode, }; -static struct ata_port_operations amd133_port_ops = { +static const struct ata_port_operations amd133_port_ops = { .inherits = &amd_base_port_ops, .cable_detect = amd_cable_detect, .set_piomode = amd133_set_piomode, @@ -433,13 +433,13 @@ static const struct ata_port_operations nv_base_port_ops = { .host_stop = nv_host_stop, }; -static struct ata_port_operations nv100_port_ops = { +static const struct ata_port_operations nv100_port_ops = { .inherits = &nv_base_port_ops, .set_piomode = nv100_set_piomode, .set_dmamode = nv100_set_dmamode, }; -static struct ata_port_operations nv133_port_ops = { +static const struct ata_port_operations nv133_port_ops = { .inherits = &nv_base_port_ops, .set_piomode = nv133_set_piomode, .set_dmamode = nv133_set_dmamode, diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c index d332cfd..4b7eaae 100644 --- a/drivers/ata/pata_artop.c +++ b/drivers/ata/pata_artop.c @@ -311,7 +311,7 @@ static struct scsi_host_template artop_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations artop6210_ops = { +static const struct ata_port_operations artop6210_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = artop6210_set_piomode, @@ -320,7 +320,7 @@ static struct ata_port_operations artop6210_ops = { .qc_defer = artop6210_qc_defer, }; -static struct ata_port_operations artop6260_ops = { +static const struct ata_port_operations artop6260_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = artop6260_cable_detect, .set_piomode = artop6260_set_piomode, diff --git a/drivers/ata/pata_at32.c b/drivers/ata/pata_at32.c index 5c129f9..7bb7ccb 100644 --- a/drivers/ata/pata_at32.c +++ b/drivers/ata/pata_at32.c @@ -172,7 +172,7 @@ static struct scsi_host_template at32_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations at32_port_ops = { +static const struct ata_port_operations at32_port_ops = { .inherits = &ata_sff_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = pata_at32_set_piomode, diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c index 41c94b1..829006d 100644 --- a/drivers/ata/pata_at91.c +++ b/drivers/ata/pata_at91.c @@ -195,7 +195,7 @@ static struct scsi_host_template pata_at91_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations pata_at91_port_ops = { +static const struct ata_port_operations pata_at91_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = pata_at91_data_xfer_noirq, diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index ae4454d..d391eb4 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -205,7 +205,7 @@ static struct scsi_host_template atiixp_sht = { .sg_tablesize = LIBATA_DUMB_MAX_PRD, }; -static struct ata_port_operations atiixp_port_ops = { +static const struct ata_port_operations atiixp_port_ops = { .inherits = &ata_bmdma_port_ops, .qc_prep = ata_sff_dumb_qc_prep, diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c index 6fe7ded..2a425dc 100644 --- a/drivers/ata/pata_atp867x.c +++ b/drivers/ata/pata_atp867x.c @@ -274,7 +274,7 @@ static struct scsi_host_template atp867x_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations atp867x_ops = { +static const struct ata_port_operations atp867x_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = atp867x_cable_detect, .set_piomode = atp867x_set_piomode, diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c index c4b47a3..b27a367 100644 --- a/drivers/ata/pata_bf54x.c +++ b/drivers/ata/pata_bf54x.c @@ -1464,7 +1464,7 @@ static struct scsi_host_template bfin_sht = { .dma_boundary = ATA_DMA_BOUNDARY, }; -static struct ata_port_operations bfin_pata_ops = { +static const struct ata_port_operations bfin_pata_ops = { .inherits = &ata_sff_port_ops, .set_piomode = bfin_set_piomode, diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c index 5acf9fa..84248be 100644 --- a/drivers/ata/pata_cmd640.c +++ b/drivers/ata/pata_cmd640.c @@ -168,7 +168,7 @@ static struct scsi_host_template cmd640_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations cmd640_port_ops = { +static const struct ata_port_operations cmd640_port_ops = { .inherits = &ata_bmdma_port_ops, /* In theory xfer_noirq is not needed once we kill the prefetcher */ .sff_data_xfer = ata_sff_data_xfer_noirq, diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c index ccd2694..c869c3d 100644 --- a/drivers/ata/pata_cmd64x.c +++ b/drivers/ata/pata_cmd64x.c @@ -271,18 +271,18 @@ static const struct ata_port_operations cmd64x_base_ops = { .set_dmamode = cmd64x_set_dmamode, }; -static struct ata_port_operations cmd64x_port_ops = { +static const struct ata_port_operations cmd64x_port_ops = { .inherits = &cmd64x_base_ops, .cable_detect = ata_cable_40wire, }; -static struct ata_port_operations cmd646r1_port_ops = { +static const struct ata_port_operations cmd646r1_port_ops = { .inherits = &cmd64x_base_ops, .bmdma_stop = cmd646r1_bmdma_stop, .cable_detect = ata_cable_40wire, }; -static struct ata_port_operations cmd648_port_ops = { +static const struct ata_port_operations cmd648_port_ops = { .inherits = &cmd64x_base_ops, .bmdma_stop = cmd648_bmdma_stop, .cable_detect = cmd648_cable_detect, diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c index 0df83cf..d7595b0 100644 --- a/drivers/ata/pata_cs5520.c +++ b/drivers/ata/pata_cs5520.c @@ -144,7 +144,7 @@ static struct scsi_host_template cs5520_sht = { .sg_tablesize = LIBATA_DUMB_MAX_PRD, }; -static struct ata_port_operations cs5520_port_ops = { +static const struct ata_port_operations cs5520_port_ops = { .inherits = &ata_bmdma_port_ops, .qc_prep = ata_sff_dumb_qc_prep, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c index c974b05..6d26b11 100644 --- a/drivers/ata/pata_cs5530.c +++ b/drivers/ata/pata_cs5530.c @@ -164,7 +164,7 @@ static struct scsi_host_template cs5530_sht = { .sg_tablesize = LIBATA_DUMB_MAX_PRD, }; -static struct ata_port_operations cs5530_port_ops = { +static const struct ata_port_operations cs5530_port_ops = { .inherits = &ata_bmdma_port_ops, .qc_prep = ata_sff_dumb_qc_prep, diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c index 403f561..aacd26b 100644 --- a/drivers/ata/pata_cs5535.c +++ b/drivers/ata/pata_cs5535.c @@ -160,7 +160,7 @@ static struct scsi_host_template cs5535_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations cs5535_port_ops = { +static const struct ata_port_operations cs5535_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = cs5535_cable_detect, .set_piomode = cs5535_set_piomode, diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index 6da4cb4..de24a25 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -223,7 +223,7 @@ static struct scsi_host_template cs5536_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations cs5536_port_ops = { +static const struct ata_port_operations cs5536_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = cs5536_cable_detect, .set_piomode = cs5536_set_piomode, diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c index 8fb040b..b16a9c9 100644 --- a/drivers/ata/pata_cypress.c +++ b/drivers/ata/pata_cypress.c @@ -113,7 +113,7 @@ static struct scsi_host_template cy82c693_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations cy82c693_port_ops = { +static const struct ata_port_operations cy82c693_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = cy82c693_set_piomode, diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c index 2a6412f..555ee11 100644 --- a/drivers/ata/pata_efar.c +++ b/drivers/ata/pata_efar.c @@ -222,7 +222,7 @@ static struct scsi_host_template efar_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations efar_ops = { +static const struct ata_port_operations efar_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = efar_cable_detect, .set_piomode = efar_set_piomode, diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c index b9d8836..0b92030 100644 --- a/drivers/ata/pata_hpt366.c +++ b/drivers/ata/pata_hpt366.c @@ -282,7 +282,7 @@ static struct scsi_host_template hpt36x_sht = { * Configuration for HPT366/68 */ -static struct ata_port_operations hpt366_port_ops = { +static const struct ata_port_operations hpt366_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = hpt36x_cable_detect, .mode_filter = hpt366_filter, diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index 5af7f19..00c4980 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -576,7 +576,7 @@ static struct scsi_host_template hpt37x_sht = { * Configuration for HPT370 */ -static struct ata_port_operations hpt370_port_ops = { +static const struct ata_port_operations hpt370_port_ops = { .inherits = &ata_bmdma_port_ops, .bmdma_stop = hpt370_bmdma_stop, @@ -591,7 +591,7 @@ static struct ata_port_operations hpt370_port_ops = { * Configuration for HPT370A. Close to 370 but less filters */ -static struct ata_port_operations hpt370a_port_ops = { +static const struct ata_port_operations hpt370a_port_ops = { .inherits = &hpt370_port_ops, .mode_filter = hpt370a_filter, }; @@ -601,7 +601,7 @@ static struct ata_port_operations hpt370a_port_ops = { * and DMA mode setting functionality. */ -static struct ata_port_operations hpt372_port_ops = { +static const struct ata_port_operations hpt372_port_ops = { .inherits = &ata_bmdma_port_ops, .bmdma_stop = hpt37x_bmdma_stop, @@ -616,7 +616,7 @@ static struct ata_port_operations hpt372_port_ops = { * but we have a different cable detection procedure for function 1. */ -static struct ata_port_operations hpt374_fn1_port_ops = { +static const struct ata_port_operations hpt374_fn1_port_ops = { .inherits = &hpt372_port_ops, .prereset = hpt374_fn1_pre_reset, }; diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c index 100f227..2e39382 100644 --- a/drivers/ata/pata_hpt3x2n.c +++ b/drivers/ata/pata_hpt3x2n.c @@ -337,7 +337,7 @@ static struct scsi_host_template hpt3x2n_sht = { * Configuration for HPT3x2n. */ -static struct ata_port_operations hpt3x2n_port_ops = { +static const struct ata_port_operations hpt3x2n_port_ops = { .inherits = &ata_bmdma_port_ops, .bmdma_stop = hpt3x2n_bmdma_stop, diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c index 7e31025..6fca8f4 100644 --- a/drivers/ata/pata_hpt3x3.c +++ b/drivers/ata/pata_hpt3x3.c @@ -141,7 +141,7 @@ static struct scsi_host_template hpt3x3_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations hpt3x3_port_ops = { +static const struct ata_port_operations hpt3x3_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = hpt3x3_set_piomode, diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c index b663b7f..9a26c2a 100644 --- a/drivers/ata/pata_icside.c +++ b/drivers/ata/pata_icside.c @@ -319,7 +319,7 @@ static void pata_icside_postreset(struct ata_link *link, unsigned int *classes) } } -static struct ata_port_operations pata_icside_port_ops = { +static const struct ata_port_operations pata_icside_port_ops = { .inherits = &ata_sff_port_ops, /* no need to build any PRD tables for DMA */ .qc_prep = ata_noop_qc_prep, diff --git a/drivers/ata/pata_isapnp.c b/drivers/ata/pata_isapnp.c index 4bceb88..457dfb6 100644 --- a/drivers/ata/pata_isapnp.c +++ b/drivers/ata/pata_isapnp.c @@ -23,12 +23,12 @@ static struct scsi_host_template isapnp_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations isapnp_port_ops = { +static const struct ata_port_operations isapnp_port_ops = { .inherits = &ata_sff_port_ops, .cable_detect = ata_cable_40wire, }; -static struct ata_port_operations isapnp_noalt_port_ops = { +static const struct ata_port_operations isapnp_noalt_port_ops = { .inherits = &ata_sff_port_ops, .cable_detect = ata_cable_40wire, /* No altstatus so we don't want to use the lost interrupt poll */ diff --git a/drivers/ata/pata_it8213.c b/drivers/ata/pata_it8213.c index f156da8..24976e2 100644 --- a/drivers/ata/pata_it8213.c +++ b/drivers/ata/pata_it8213.c @@ -234,7 +234,7 @@ static struct scsi_host_template it8213_sht = { }; -static struct ata_port_operations it8213_ops = { +static const struct ata_port_operations it8213_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = it8213_cable_detect, .set_piomode = it8213_set_piomode, diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index 188bc2f..ca9e785 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -800,7 +800,7 @@ static struct scsi_host_template it821x_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations it821x_smart_port_ops = { +static const struct ata_port_operations it821x_smart_port_ops = { .inherits = &ata_bmdma_port_ops, .check_atapi_dma= it821x_check_atapi_dma, @@ -814,7 +814,7 @@ static struct ata_port_operations it821x_smart_port_ops = { .port_start = it821x_port_start, }; -static struct ata_port_operations it821x_passthru_port_ops = { +static const struct ata_port_operations it821x_passthru_port_ops = { .inherits = &ata_bmdma_port_ops, .check_atapi_dma= it821x_check_atapi_dma, @@ -830,7 +830,7 @@ static struct ata_port_operations it821x_passthru_port_ops = { .port_start = it821x_port_start, }; -static struct ata_port_operations it821x_rdc_port_ops = { +static const struct ata_port_operations it821x_rdc_port_ops = { .inherits = &ata_bmdma_port_ops, .check_atapi_dma= it821x_check_atapi_dma, diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c index ba54b08..4b952b7 100644 --- a/drivers/ata/pata_ixp4xx_cf.c +++ b/drivers/ata/pata_ixp4xx_cf.c @@ -89,7 +89,7 @@ static struct scsi_host_template ixp4xx_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations ixp4xx_port_ops = { +static const struct ata_port_operations ixp4xx_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = ixp4xx_mmio_data_xfer, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c index 3a1474a..434b0ff 100644 --- a/drivers/ata/pata_jmicron.c +++ b/drivers/ata/pata_jmicron.c @@ -111,7 +111,7 @@ static struct scsi_host_template jmicron_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations jmicron_ops = { +static const struct ata_port_operations jmicron_ops = { .inherits = &ata_bmdma_port_ops, .prereset = jmicron_pre_reset, }; diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 6932e56..220e71d 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -106,7 +106,7 @@ struct legacy_probe { struct legacy_controller { const char *name; - struct ata_port_operations *ops; + const struct ata_port_operations *ops; unsigned int pio_mask; unsigned int flags; unsigned int pflags; @@ -223,12 +223,12 @@ static const struct ata_port_operations legacy_base_port_ops = { * pio_mask as well. */ -static struct ata_port_operations simple_port_ops = { +static const struct ata_port_operations simple_port_ops = { .inherits = &legacy_base_port_ops, .sff_data_xfer = ata_sff_data_xfer_noirq, }; -static struct ata_port_operations legacy_port_ops = { +static const struct ata_port_operations legacy_port_ops = { .inherits = &legacy_base_port_ops, .sff_data_xfer = ata_sff_data_xfer_noirq, .set_mode = legacy_set_mode, @@ -324,7 +324,7 @@ static unsigned int pdc_data_xfer_vlb(struct ata_device *dev, return buflen; } -static struct ata_port_operations pdc20230_port_ops = { +static const struct ata_port_operations pdc20230_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = pdc20230_set_piomode, .sff_data_xfer = pdc_data_xfer_vlb, @@ -357,7 +357,7 @@ static void ht6560a_set_piomode(struct ata_port *ap, struct ata_device *adev) ioread8(ap->ioaddr.status_addr); } -static struct ata_port_operations ht6560a_port_ops = { +static const struct ata_port_operations ht6560a_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = ht6560a_set_piomode, }; @@ -400,7 +400,7 @@ static void ht6560b_set_piomode(struct ata_port *ap, struct ata_device *adev) ioread8(ap->ioaddr.status_addr); } -static struct ata_port_operations ht6560b_port_ops = { +static const struct ata_port_operations ht6560b_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = ht6560b_set_piomode, }; @@ -499,7 +499,7 @@ static void opti82c611a_set_piomode(struct ata_port *ap, } -static struct ata_port_operations opti82c611a_port_ops = { +static const struct ata_port_operations opti82c611a_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = opti82c611a_set_piomode, }; @@ -609,7 +609,7 @@ static unsigned int opti82c46x_qc_issue(struct ata_queued_cmd *qc) return ata_sff_qc_issue(qc); } -static struct ata_port_operations opti82c46x_port_ops = { +static const struct ata_port_operations opti82c46x_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = opti82c46x_set_piomode, .qc_issue = opti82c46x_qc_issue, @@ -771,20 +771,20 @@ static int qdi_port(struct platform_device *dev, return 0; } -static struct ata_port_operations qdi6500_port_ops = { +static const struct ata_port_operations qdi6500_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = qdi6500_set_piomode, .qc_issue = qdi_qc_issue, .sff_data_xfer = vlb32_data_xfer, }; -static struct ata_port_operations qdi6580_port_ops = { +static const struct ata_port_operations qdi6580_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = qdi6580_set_piomode, .sff_data_xfer = vlb32_data_xfer, }; -static struct ata_port_operations qdi6580dp_port_ops = { +static const struct ata_port_operations qdi6580dp_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = qdi6580dp_set_piomode, .sff_data_xfer = vlb32_data_xfer, @@ -855,7 +855,7 @@ static int winbond_port(struct platform_device *dev, return 0; } -static struct ata_port_operations winbond_port_ops = { +static const struct ata_port_operations winbond_port_ops = { .inherits = &legacy_base_port_ops, .set_piomode = winbond_set_piomode, .sff_data_xfer = vlb32_data_xfer, @@ -978,7 +978,7 @@ static __init int legacy_init_one(struct legacy_probe *probe) int pio_modes = controller->pio_mask; unsigned long io = probe->port; u32 mask = (1 << probe->slot); - struct ata_port_operations *ops = controller->ops; + const struct ata_port_operations *ops = controller->ops; struct legacy_data *ld = &legacy_data[probe->slot]; struct ata_host *host = NULL; struct ata_port *ap; diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index 2096fb7..4d090fc 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -100,7 +100,7 @@ static struct scsi_host_template marvell_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations marvell_ops = { +static const struct ata_port_operations marvell_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = marvell_cable_detect, .prereset = marvell_pre_reset, diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c index 99d41be..7d56aa8 100644 --- a/drivers/ata/pata_mpc52xx.c +++ b/drivers/ata/pata_mpc52xx.c @@ -609,7 +609,7 @@ static struct scsi_host_template mpc52xx_ata_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations mpc52xx_ata_port_ops = { +static const struct ata_port_operations mpc52xx_ata_port_ops = { .inherits = &ata_bmdma_port_ops, .sff_dev_select = mpc52xx_ata_dev_select, .set_piomode = mpc52xx_ata_set_piomode, diff --git a/drivers/ata/pata_mpiix.c b/drivers/ata/pata_mpiix.c index b21f002..0a27e7f 100644 --- a/drivers/ata/pata_mpiix.c +++ b/drivers/ata/pata_mpiix.c @@ -140,7 +140,7 @@ static struct scsi_host_template mpiix_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations mpiix_port_ops = { +static const struct ata_port_operations mpiix_port_ops = { .inherits = &ata_sff_port_ops, .qc_issue = mpiix_qc_issue, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c index f0d52f7..89c3be3 100644 --- a/drivers/ata/pata_netcell.c +++ b/drivers/ata/pata_netcell.c @@ -34,7 +34,7 @@ static struct scsi_host_template netcell_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations netcell_ops = { +static const struct ata_port_operations netcell_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_80wire, .read_id = netcell_read_id, diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index dd53a66..a3f4317 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -81,7 +81,7 @@ static struct scsi_host_template ninja32_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations ninja32_port_ops = { +static const struct ata_port_operations ninja32_port_ops = { .inherits = &ata_bmdma_port_ops, .sff_dev_select = ninja32_dev_select, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_ns87410.c b/drivers/ata/pata_ns87410.c index ca53fac..9aa93ef 100644 --- a/drivers/ata/pata_ns87410.c +++ b/drivers/ata/pata_ns87410.c @@ -132,7 +132,7 @@ static struct scsi_host_template ns87410_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations ns87410_port_ops = { +static const struct ata_port_operations ns87410_port_ops = { .inherits = &ata_sff_port_ops, .qc_issue = ns87410_qc_issue, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c index 773b159..55f454e 100644 --- a/drivers/ata/pata_ns87415.c +++ b/drivers/ata/pata_ns87415.c @@ -299,7 +299,7 @@ static u8 ns87560_bmdma_status(struct ata_port *ap) } #endif /* 87560 SuperIO Support */ -static struct ata_port_operations ns87415_pata_ops = { +static const struct ata_port_operations ns87415_pata_ops = { .inherits = &ata_bmdma_port_ops, .check_atapi_dma = ns87415_check_atapi_dma, @@ -313,7 +313,7 @@ static struct ata_port_operations ns87415_pata_ops = { }; #if defined(CONFIG_SUPERIO) -static struct ata_port_operations ns87560_pata_ops = { +static const struct ata_port_operations ns87560_pata_ops = { .inherits = &ns87415_pata_ops, .sff_tf_read = ns87560_tf_read, .sff_check_status = ns87560_check_status, diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index d6f6956..639295b 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -801,6 +801,7 @@ static unsigned int octeon_cf_qc_issue(struct ata_queued_cmd *qc) return 0; } +/* cannot be const */ static struct ata_port_operations octeon_cf_ops = { .inherits = &ata_sff_port_ops, .check_atapi_dma = octeon_cf_check_atapi_dma, diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c index 84ac503..adee1cd 100644 --- a/drivers/ata/pata_oldpiix.c +++ b/drivers/ata/pata_oldpiix.c @@ -208,7 +208,7 @@ static struct scsi_host_template oldpiix_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations oldpiix_pata_ops = { +static const struct ata_port_operations oldpiix_pata_ops = { .inherits = &ata_bmdma_port_ops, .qc_issue = oldpiix_qc_issue, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_opti.c b/drivers/ata/pata_opti.c index 99eddda..3a4c0aa 100644 --- a/drivers/ata/pata_opti.c +++ b/drivers/ata/pata_opti.c @@ -152,7 +152,7 @@ static struct scsi_host_template opti_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations opti_port_ops = { +static const struct ata_port_operations opti_port_ops = { .inherits = &ata_sff_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = opti_set_piomode, diff --git a/drivers/ata/pata_optidma.c b/drivers/ata/pata_optidma.c index 86885a4..8e9968d 100644 --- a/drivers/ata/pata_optidma.c +++ b/drivers/ata/pata_optidma.c @@ -337,7 +337,7 @@ static struct scsi_host_template optidma_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations optidma_port_ops = { +static const struct ata_port_operations optidma_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_40wire, .set_piomode = optidma_set_pio_mode, @@ -346,7 +346,7 @@ static struct ata_port_operations optidma_port_ops = { .prereset = optidma_pre_reset, }; -static struct ata_port_operations optiplus_port_ops = { +static const struct ata_port_operations optiplus_port_ops = { .inherits = &optidma_port_ops, .set_piomode = optiplus_set_pio_mode, .set_dmamode = optiplus_set_dma_mode, diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c index 11fb4cc..1a14022 100644 --- a/drivers/ata/pata_palmld.c +++ b/drivers/ata/pata_palmld.c @@ -37,7 +37,7 @@ static struct scsi_host_template palmld_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations palmld_port_ops = { +static const struct ata_port_operations palmld_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = ata_sff_data_xfer_noirq, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index dc99e26..7f4b1e4 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -162,14 +162,14 @@ static struct scsi_host_template pcmcia_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations pcmcia_port_ops = { +static const struct ata_port_operations pcmcia_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = ata_sff_data_xfer_noirq, .cable_detect = ata_cable_40wire, .set_mode = pcmcia_set_mode, }; -static struct ata_port_operations pcmcia_8bit_port_ops = { +static const struct ata_port_operations pcmcia_8bit_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = ata_data_xfer_8bit, .cable_detect = ata_cable_40wire, @@ -256,7 +256,7 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) unsigned long io_base, ctl_base; void __iomem *io_addr, *ctl_addr; int n_ports = 1; - struct ata_port_operations *ops = &pcmcia_port_ops; + const struct ata_port_operations *ops = &pcmcia_port_ops; info = kzalloc(sizeof(*info), GFP_KERNEL); if (info == NULL) diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index ca5cad0..3a1f125 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -132,14 +132,14 @@ static struct scsi_host_template pdc2027x_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations pdc2027x_pata100_ops = { +static const struct ata_port_operations pdc2027x_pata100_ops = { .inherits = &ata_bmdma_port_ops, .check_atapi_dma = pdc2027x_check_atapi_dma, .cable_detect = pdc2027x_cable_detect, .prereset = pdc2027x_prereset, }; -static struct ata_port_operations pdc2027x_pata133_ops = { +static const struct ata_port_operations pdc2027x_pata133_ops = { .inherits = &pdc2027x_pata100_ops, .mode_filter = pdc2027x_mode_filter, .set_piomode = pdc2027x_set_piomode, diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c index 2911120..4bf62aa 100644 --- a/drivers/ata/pata_pdc202xx_old.c +++ b/drivers/ata/pata_pdc202xx_old.c @@ -274,7 +274,7 @@ static struct scsi_host_template pdc202xx_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations pdc2024x_port_ops = { +static const struct ata_port_operations pdc2024x_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_40wire, @@ -284,7 +284,7 @@ static struct ata_port_operations pdc2024x_port_ops = { .sff_exec_command = pdc202xx_exec_command, }; -static struct ata_port_operations pdc2026x_port_ops = { +static const struct ata_port_operations pdc2026x_port_ops = { .inherits = &pdc2024x_port_ops, .check_atapi_dma = pdc2026x_check_atapi_dma, diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 3f6ebc6..a18c358 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -48,7 +48,7 @@ static struct scsi_host_template pata_platform_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations pata_platform_port_ops = { +static const struct ata_port_operations pata_platform_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = ata_sff_data_xfer_noirq, .cable_detect = ata_cable_unknown, diff --git a/drivers/ata/pata_qdi.c b/drivers/ata/pata_qdi.c index 45879dc..165a9f9 100644 --- a/drivers/ata/pata_qdi.c +++ b/drivers/ata/pata_qdi.c @@ -157,7 +157,7 @@ static struct scsi_host_template qdi_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations qdi6500_port_ops = { +static const struct ata_port_operations qdi6500_port_ops = { .inherits = &ata_sff_port_ops, .qc_issue = qdi_qc_issue, .sff_data_xfer = qdi_data_xfer, @@ -165,7 +165,7 @@ static struct ata_port_operations qdi6500_port_ops = { .set_piomode = qdi6500_set_piomode, }; -static struct ata_port_operations qdi6580_port_ops = { +static const struct ata_port_operations qdi6580_port_ops = { .inherits = &qdi6500_port_ops, .set_piomode = qdi6580_set_piomode, }; diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c index 4401b33..716c5cc 100644 --- a/drivers/ata/pata_radisys.c +++ b/drivers/ata/pata_radisys.c @@ -187,7 +187,7 @@ static struct scsi_host_template radisys_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations radisys_pata_ops = { +static const struct ata_port_operations radisys_pata_ops = { .inherits = &ata_bmdma_port_ops, .qc_issue = radisys_qc_issue, .cable_detect = ata_cable_unknown, diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c index 45f1e10..fab6bca 100644 --- a/drivers/ata/pata_rb532_cf.c +++ b/drivers/ata/pata_rb532_cf.c @@ -68,7 +68,7 @@ static irqreturn_t rb532_pata_irq_handler(int irq, void *dev_instance) return IRQ_HANDLED; } -static struct ata_port_operations rb532_pata_port_ops = { +static const struct ata_port_operations rb532_pata_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = ata_sff_data_xfer32, }; diff --git a/drivers/ata/pata_rdc.c b/drivers/ata/pata_rdc.c index c843a1e..b5853c3 100644 --- a/drivers/ata/pata_rdc.c +++ b/drivers/ata/pata_rdc.c @@ -272,7 +272,7 @@ static void rdc_set_dmamode(struct ata_port *ap, struct ata_device *adev) pci_write_config_byte(dev, 0x48, udma_enable); } -static struct ata_port_operations rdc_pata_ops = { +static const struct ata_port_operations rdc_pata_ops = { .inherits = &ata_bmdma32_port_ops, .cable_detect = rdc_pata_cable_detect, .set_piomode = rdc_set_piomode, diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c index a5e4dfe..080c8c9 100644 --- a/drivers/ata/pata_rz1000.c +++ b/drivers/ata/pata_rz1000.c @@ -54,7 +54,7 @@ static struct scsi_host_template rz1000_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations rz1000_port_ops = { +static const struct ata_port_operations rz1000_port_ops = { .inherits = &ata_sff_port_ops, .cable_detect = ata_cable_40wire, .set_mode = rz1000_set_mode, diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c index 3bbed83..e309daf 100644 --- a/drivers/ata/pata_sc1200.c +++ b/drivers/ata/pata_sc1200.c @@ -207,7 +207,7 @@ static struct scsi_host_template sc1200_sht = { .sg_tablesize = LIBATA_DUMB_MAX_PRD, }; -static struct ata_port_operations sc1200_port_ops = { +static const struct ata_port_operations sc1200_port_ops = { .inherits = &ata_bmdma_port_ops, .qc_prep = ata_sff_dumb_qc_prep, .qc_issue = sc1200_qc_issue, diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c index 4257d6b..4c1d9d5 100644 --- a/drivers/ata/pata_scc.c +++ b/drivers/ata/pata_scc.c @@ -965,7 +965,7 @@ static struct scsi_host_template scc_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations scc_pata_ops = { +static const struct ata_port_operations scc_pata_ops = { .inherits = &ata_bmdma_port_ops, .set_piomode = scc_set_piomode, diff --git a/drivers/ata/pata_sch.c b/drivers/ata/pata_sch.c index 99cceb4..e2e0a87 100644 --- a/drivers/ata/pata_sch.c +++ b/drivers/ata/pata_sch.c @@ -75,7 +75,7 @@ static struct scsi_host_template sch_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations sch_pata_ops = { +static const struct ata_port_operations sch_pata_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = ata_cable_unknown, .set_piomode = sch_set_piomode, diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c index beaed12..39969f1 100644 --- a/drivers/ata/pata_serverworks.c +++ b/drivers/ata/pata_serverworks.c @@ -299,7 +299,7 @@ static struct scsi_host_template serverworks_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations serverworks_osb4_port_ops = { +static const struct ata_port_operations serverworks_osb4_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = serverworks_cable_detect, .mode_filter = serverworks_osb4_filter, @@ -307,7 +307,7 @@ static struct ata_port_operations serverworks_osb4_port_ops = { .set_dmamode = serverworks_set_dmamode, }; -static struct ata_port_operations serverworks_csb_port_ops = { +static const struct ata_port_operations serverworks_csb_port_ops = { .inherits = &serverworks_osb4_port_ops, .mode_filter = serverworks_csb_filter, }; diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index a2ace48..0463b44 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -194,7 +194,7 @@ static struct scsi_host_template sil680_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations sil680_port_ops = { +static const struct ata_port_operations sil680_port_ops = { .inherits = &ata_bmdma32_port_ops, .cable_detect = sil680_cable_detect, .set_piomode = sil680_set_piomode, diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c index 488e77b..b3724d5 100644 --- a/drivers/ata/pata_sis.c +++ b/drivers/ata/pata_sis.c @@ -503,47 +503,47 @@ static struct scsi_host_template sis_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations sis_133_for_sata_ops = { +static const struct ata_port_operations sis_133_for_sata_ops = { .inherits = &ata_bmdma_port_ops, .set_piomode = sis_133_set_piomode, .set_dmamode = sis_133_set_dmamode, .cable_detect = sis_133_cable_detect, }; -static struct ata_port_operations sis_base_ops = { +static const struct ata_port_operations sis_base_ops = { .inherits = &ata_bmdma_port_ops, .prereset = sis_pre_reset, }; -static struct ata_port_operations sis_133_ops = { +static const struct ata_port_operations sis_133_ops = { .inherits = &sis_base_ops, .set_piomode = sis_133_set_piomode, .set_dmamode = sis_133_set_dmamode, .cable_detect = sis_133_cable_detect, }; -static struct ata_port_operations sis_133_early_ops = { +static const struct ata_port_operations sis_133_early_ops = { .inherits = &sis_base_ops, .set_piomode = sis_100_set_piomode, .set_dmamode = sis_133_early_set_dmamode, .cable_detect = sis_66_cable_detect, }; -static struct ata_port_operations sis_100_ops = { +static const struct ata_port_operations sis_100_ops = { .inherits = &sis_base_ops, .set_piomode = sis_100_set_piomode, .set_dmamode = sis_100_set_dmamode, .cable_detect = sis_66_cable_detect, }; -static struct ata_port_operations sis_66_ops = { +static const struct ata_port_operations sis_66_ops = { .inherits = &sis_base_ops, .set_piomode = sis_old_set_piomode, .set_dmamode = sis_66_set_dmamode, .cable_detect = sis_66_cable_detect, }; -static struct ata_port_operations sis_old_ops = { +static const struct ata_port_operations sis_old_ops = { .inherits = &sis_base_ops, .set_piomode = sis_old_set_piomode, .set_dmamode = sis_old_set_dmamode, diff --git a/drivers/ata/pata_sl82c105.c b/drivers/ata/pata_sl82c105.c index 29f733c..43e9ca0 100644 --- a/drivers/ata/pata_sl82c105.c +++ b/drivers/ata/pata_sl82c105.c @@ -231,7 +231,7 @@ static struct scsi_host_template sl82c105_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations sl82c105_port_ops = { +static const struct ata_port_operations sl82c105_port_ops = { .inherits = &ata_bmdma_port_ops, .qc_defer = sl82c105_qc_defer, .bmdma_start = sl82c105_bmdma_start, diff --git a/drivers/ata/pata_triflex.c b/drivers/ata/pata_triflex.c index f1f13ff..df39e99 100644 --- a/drivers/ata/pata_triflex.c +++ b/drivers/ata/pata_triflex.c @@ -178,7 +178,7 @@ static struct scsi_host_template triflex_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations triflex_port_ops = { +static const struct ata_port_operations triflex_port_ops = { .inherits = &ata_bmdma_port_ops, .bmdma_start = triflex_bmdma_start, .bmdma_stop = triflex_bmdma_stop, diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 1d73b8d..98a4b29 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -419,7 +419,7 @@ static struct scsi_host_template via_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations via_port_ops = { +static const struct ata_port_operations via_port_ops = { .inherits = &ata_bmdma_port_ops, .cable_detect = via_cable_detect, .set_piomode = via_set_piomode, @@ -429,7 +429,7 @@ static struct ata_port_operations via_port_ops = { .port_start = via_port_start, }; -static struct ata_port_operations via_port_ops_noirq = { +static const struct ata_port_operations via_port_ops_noirq = { .inherits = &via_port_ops, .sff_data_xfer = ata_sff_data_xfer_noirq, }; diff --git a/drivers/ata/pata_winbond.c b/drivers/ata/pata_winbond.c index 6d8619b..ad511c4 100644 --- a/drivers/ata/pata_winbond.c +++ b/drivers/ata/pata_winbond.c @@ -125,7 +125,7 @@ static struct scsi_host_template winbond_sht = { ATA_PIO_SHT(DRV_NAME), }; -static struct ata_port_operations winbond_port_ops = { +static const struct ata_port_operations winbond_port_ops = { .inherits = &ata_sff_port_ops, .sff_data_xfer = winbond_data_xfer, .cable_detect = ata_cable_40wire, diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c index 6c65b07..f996ec7 100644 --- a/drivers/ata/pdc_adma.c +++ b/drivers/ata/pdc_adma.c @@ -145,7 +145,7 @@ static struct scsi_host_template adma_ata_sht = { .dma_boundary = ADMA_DMA_BOUNDARY, }; -static struct ata_port_operations adma_ata_ops = { +static const struct ata_port_operations adma_ata_ops = { .inherits = &ata_sff_port_ops, .lost_interrupt = ATA_OP_NULL, diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 172b57e..c49bc1e 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1258,7 +1258,7 @@ static struct scsi_host_template sata_fsl_sht = { .dma_boundary = ATA_DMA_BOUNDARY, }; -static struct ata_port_operations sata_fsl_ops = { +static const struct ata_port_operations sata_fsl_ops = { .inherits = &sata_pmp_port_ops, .qc_defer = ata_std_qc_defer, diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index 4406902..60603ef 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -721,7 +721,7 @@ static int inic_port_start(struct ata_port *ap) return 0; } -static struct ata_port_operations inic_port_ops = { +static const struct ata_port_operations inic_port_ops = { .inherits = &sata_port_ops, .check_atapi_dma = inic_check_atapi_dma, diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index cf41126..8107be6 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -656,7 +656,7 @@ static struct scsi_host_template mv6_sht = { .dma_boundary = MV_DMA_BOUNDARY, }; -static struct ata_port_operations mv5_ops = { +static const struct ata_port_operations mv5_ops = { .inherits = &ata_sff_port_ops, .lost_interrupt = ATA_OP_NULL, @@ -678,7 +678,7 @@ static struct ata_port_operations mv5_ops = { .port_stop = mv_port_stop, }; -static struct ata_port_operations mv6_ops = { +static const struct ata_port_operations mv6_ops = { .inherits = &mv5_ops, .dev_config = mv6_dev_config, .scr_read = mv_scr_read, @@ -698,7 +698,7 @@ static struct ata_port_operations mv6_ops = { .bmdma_status = mv_bmdma_status, }; -static struct ata_port_operations mv_iie_ops = { +static const struct ata_port_operations mv_iie_ops = { .inherits = &mv6_ops, .dev_config = ATA_OP_NULL, .qc_prep = mv_qc_prep_iie, diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index ae2297c..d5c9c33 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -464,7 +464,7 @@ static struct scsi_host_template nv_swncq_sht = { * cases. Define nv_hardreset() which only kicks in for post-boot * probing and use it for all variants. */ -static struct ata_port_operations nv_generic_ops = { +static const struct ata_port_operations nv_generic_ops = { .inherits = &ata_bmdma_port_ops, .lost_interrupt = ATA_OP_NULL, .scr_read = nv_scr_read, @@ -472,20 +472,20 @@ static struct ata_port_operations nv_generic_ops = { .hardreset = nv_hardreset, }; -static struct ata_port_operations nv_nf2_ops = { +static const struct ata_port_operations nv_nf2_ops = { .inherits = &nv_generic_ops, .freeze = nv_nf2_freeze, .thaw = nv_nf2_thaw, }; -static struct ata_port_operations nv_ck804_ops = { +static const struct ata_port_operations nv_ck804_ops = { .inherits = &nv_generic_ops, .freeze = nv_ck804_freeze, .thaw = nv_ck804_thaw, .host_stop = nv_ck804_host_stop, }; -static struct ata_port_operations nv_adma_ops = { +static const struct ata_port_operations nv_adma_ops = { .inherits = &nv_ck804_ops, .check_atapi_dma = nv_adma_check_atapi_dma, @@ -509,7 +509,7 @@ static struct ata_port_operations nv_adma_ops = { .host_stop = nv_adma_host_stop, }; -static struct ata_port_operations nv_swncq_ops = { +static const struct ata_port_operations nv_swncq_ops = { .inherits = &nv_generic_ops, .qc_defer = ata_std_qc_defer, diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 07d8d00..6cc70bb 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -195,7 +195,7 @@ static const struct ata_port_operations pdc_common_ops = { .error_handler = pdc_error_handler, }; -static struct ata_port_operations pdc_sata_ops = { +static const struct ata_port_operations pdc_sata_ops = { .inherits = &pdc_common_ops, .cable_detect = pdc_sata_cable_detect, .freeze = pdc_sata_freeze, @@ -208,14 +208,14 @@ static struct ata_port_operations pdc_sata_ops = { /* First-generation chips need a more restrictive ->check_atapi_dma op, and ->freeze/thaw that ignore the hotplug controls. */ -static struct ata_port_operations pdc_old_sata_ops = { +static const struct ata_port_operations pdc_old_sata_ops = { .inherits = &pdc_sata_ops, .freeze = pdc_freeze, .thaw = pdc_thaw, .check_atapi_dma = pdc_old_sata_check_atapi_dma, }; -static struct ata_port_operations pdc_pata_ops = { +static const struct ata_port_operations pdc_pata_ops = { .inherits = &pdc_common_ops, .cable_detect = pdc_pata_cable_detect, .freeze = pdc_freeze, diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c index 326c0cf..36ecebe 100644 --- a/drivers/ata/sata_qstor.c +++ b/drivers/ata/sata_qstor.c @@ -132,7 +132,7 @@ static struct scsi_host_template qs_ata_sht = { .dma_boundary = QS_DMA_BOUNDARY, }; -static struct ata_port_operations qs_ata_ops = { +static const struct ata_port_operations qs_ata_ops = { .inherits = &ata_sff_port_ops, .check_atapi_dma = qs_check_atapi_dma, diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index 3cb69d5..0871d3c 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -182,7 +182,7 @@ static struct scsi_host_template sil_sht = { .sg_tablesize = ATA_MAX_PRD }; -static struct ata_port_operations sil_ops = { +static const struct ata_port_operations sil_ops = { .inherits = &ata_bmdma32_port_ops, .dev_config = sil_dev_config, .set_mode = sil_set_mode, diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index e6946fc..eddb794 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -388,7 +388,7 @@ static struct scsi_host_template sil24_sht = { .dma_boundary = ATA_DMA_BOUNDARY, }; -static struct ata_port_operations sil24_ops = { +static const struct ata_port_operations sil24_ops = { .inherits = &sata_pmp_port_ops, .qc_defer = sil24_qc_defer, diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c index f8a91bf..9cb06b6 100644 --- a/drivers/ata/sata_sis.c +++ b/drivers/ata/sata_sis.c @@ -89,7 +89,7 @@ static struct scsi_host_template sis_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations sis_ops = { +static const struct ata_port_operations sis_ops = { .inherits = &ata_bmdma_port_ops, .scr_read = sis_scr_read, .scr_write = sis_scr_write, diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c index 7257f2d..d04c6f5 100644 --- a/drivers/ata/sata_svw.c +++ b/drivers/ata/sata_svw.c @@ -344,7 +344,7 @@ static struct scsi_host_template k2_sata_sht = { }; -static struct ata_port_operations k2_sata_ops = { +static const struct ata_port_operations k2_sata_ops = { .inherits = &ata_bmdma_port_ops, .sff_tf_load = k2_sata_tf_load, .sff_tf_read = k2_sata_tf_read, diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index bbcf970..cd0df0d 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -248,7 +248,7 @@ static struct scsi_host_template pdc_sata_sht = { }; /* TODO: inherit from base port_ops after converting to new EH */ -static struct ata_port_operations pdc_20621_ops = { +static const struct ata_port_operations pdc_20621_ops = { .inherits = &ata_sff_port_ops, .check_atapi_dma = pdc_check_atapi_dma, diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c index e5bff47..089d859 100644 --- a/drivers/ata/sata_uli.c +++ b/drivers/ata/sata_uli.c @@ -79,7 +79,7 @@ static struct scsi_host_template uli_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations uli_ops = { +static const struct ata_port_operations uli_ops = { .inherits = &ata_bmdma_port_ops, .scr_read = uli_scr_read, .scr_write = uli_scr_write, diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index f5dcca7..77b94eb 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -115,32 +115,32 @@ static struct scsi_host_template svia_sht = { ATA_BMDMA_SHT(DRV_NAME), }; -static struct ata_port_operations svia_base_ops = { +static const struct ata_port_operations svia_base_ops = { .inherits = &ata_bmdma_port_ops, .sff_tf_load = svia_tf_load, }; -static struct ata_port_operations vt6420_sata_ops = { +static const struct ata_port_operations vt6420_sata_ops = { .inherits = &svia_base_ops, .freeze = svia_noop_freeze, .prereset = vt6420_prereset, .bmdma_start = vt6420_bmdma_start, }; -static struct ata_port_operations vt6421_pata_ops = { +static const struct ata_port_operations vt6421_pata_ops = { .inherits = &svia_base_ops, .cable_detect = vt6421_pata_cable_detect, .set_piomode = vt6421_set_pio_mode, .set_dmamode = vt6421_set_dma_mode, }; -static struct ata_port_operations vt6421_sata_ops = { +static const struct ata_port_operations vt6421_sata_ops = { .inherits = &svia_base_ops, .scr_read = svia_scr_read, .scr_write = svia_scr_write, }; -static struct ata_port_operations vt8251_ops = { +static const struct ata_port_operations vt8251_ops = { .inherits = &svia_base_ops, .hardreset = sata_std_hardreset, .scr_read = vt8251_scr_read, diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c index 8b2a278..51e65d3 100644 --- a/drivers/ata/sata_vsc.c +++ b/drivers/ata/sata_vsc.c @@ -306,7 +306,7 @@ static struct scsi_host_template vsc_sata_sht = { }; -static struct ata_port_operations vsc_sata_ops = { +static const struct ata_port_operations vsc_sata_ops = { .inherits = &ata_bmdma_port_ops, /* The IRQ handling is not quite standard SFF behaviour so we cannot use the default lost interrupt handler */ diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c index 5effec6..7e4019a 100644 --- a/drivers/atm/adummy.c +++ b/drivers/atm/adummy.c @@ -77,7 +77,7 @@ adummy_send(struct atm_vcc *vcc, struct sk_buff *skb) vcc->pop(vcc, skb); else dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); return 0; } diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 66e1813..26a27c6 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -453,7 +453,7 @@ static void tx_complete (amb_dev * dev, tx_out * tx) { PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx); // VC layer stats - atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); + atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); // free the descriptor kfree (tx_descr); @@ -494,7 +494,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { dump_skb ("<<<", vc, skb); // VC layer stats - atomic_inc(&atm_vcc->stats->rx); + atomic_inc_unchecked(&atm_vcc->stats->rx); __net_timestamp(skb); // end of our responsability atm_vcc->push (atm_vcc, skb); @@ -509,7 +509,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { } else { PRINTK (KERN_INFO, "dropped over-size frame"); // should we count this? - atomic_inc(&atm_vcc->stats->rx_drop); + atomic_inc_unchecked(&atm_vcc->stats->rx_drop); } } else { @@ -1341,7 +1341,7 @@ static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) { } if (check_area (skb->data, skb->len)) { - atomic_inc(&atm_vcc->stats->tx_err); + atomic_inc_unchecked(&atm_vcc->stats->tx_err); return -ENOMEM; // ? } diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index 02ad83d..6daffeb 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -206,7 +206,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb) if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); if (dev_data) return 0; - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -ENOLINK; } size = skb->len+sizeof(struct atmtcp_hdr); @@ -214,7 +214,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb) if (!new_skb) { if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -ENOBUFS; } hdr = (void *) skb_put(new_skb,sizeof(struct atmtcp_hdr)); @@ -225,8 +225,8 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb) if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); out_vcc->push(out_vcc,new_skb); - atomic_inc(&vcc->stats->tx); - atomic_inc(&out_vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->tx); + atomic_inc_unchecked(&out_vcc->stats->rx); return 0; } @@ -300,7 +300,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) out_vcc = find_vcc(dev, ntohs(hdr->vpi), ntohs(hdr->vci)); read_unlock(&vcc_sklist_lock); if (!out_vcc) { - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); goto done; } skb_pull(skb,sizeof(struct atmtcp_hdr)); @@ -312,8 +312,8 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) __net_timestamp(new_skb); skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len); out_vcc->push(out_vcc,new_skb); - atomic_inc(&vcc->stats->tx); - atomic_inc(&out_vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->tx); + atomic_inc_unchecked(&out_vcc->stats->rx); done: if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 0c30261..3da356e 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -525,7 +525,7 @@ static int rx_aal0(struct atm_vcc *vcc) DPRINTK(DEV_LABEL "(itf %d): trashing empty cell\n", vcc->dev->number); length = 0; - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); } else { length = ATM_CELL_SIZE-1; /* no HEC */ @@ -580,7 +580,7 @@ static int rx_aal5(struct atm_vcc *vcc) size); } eff = length = 0; - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); } else { size = (descr & MID_RED_COUNT)*(ATM_CELL_PAYLOAD >> 2); @@ -597,7 +597,7 @@ static int rx_aal5(struct atm_vcc *vcc) "(VCI=%d,length=%ld,size=%ld (descr 0x%lx))\n", vcc->dev->number,vcc->vci,length,size << 2,descr); length = eff = 0; - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); } } skb = eff ? atm_alloc_charge(vcc,eff << 2,GFP_ATOMIC) : NULL; @@ -770,7 +770,7 @@ rx_dequeued++; vcc->push(vcc,skb); pushed++; } - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); } wake_up(&eni_dev->rx_wait); } @@ -1227,7 +1227,7 @@ static void dequeue_tx(struct atm_dev *dev) PCI_DMA_TODEVICE); if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb_irq(skb); - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); wake_up(&eni_dev->tx_wait); dma_complete++; } @@ -1570,7 +1570,7 @@ tx_complete++; /*--------------------------------- entries ---------------------------------*/ -static const char *media_name[] __devinitdata = { +static const char *media_name[] __devinitconst = { "MMF", "SMF", "MMF", "03?", /* 0- 3 */ "UTP", "05?", "06?", "07?", /* 4- 7 */ "TAXI","09?", "10?", "11?", /* 8-11 */ diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index cd5049a..a51209f 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -748,7 +748,7 @@ static void process_txdone_queue (struct fs_dev *dev, struct queue *q) } } - atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); + atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); fs_dprintk (FS_DEBUG_TXMEM, "i"); fs_dprintk (FS_DEBUG_ALLOC, "Free t-skb: %p\n", skb); @@ -815,7 +815,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q) #endif skb_put (skb, qe->p1 & 0xffff); ATM_SKB(skb)->vcc = atm_vcc; - atomic_inc(&atm_vcc->stats->rx); + atomic_inc_unchecked(&atm_vcc->stats->rx); __net_timestamp(skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); atm_vcc->push (atm_vcc, skb); @@ -836,12 +836,12 @@ static void process_incoming (struct fs_dev *dev, struct queue *q) kfree (pe); } if (atm_vcc) - atomic_inc(&atm_vcc->stats->rx_drop); + atomic_inc_unchecked(&atm_vcc->stats->rx_drop); break; case 0x1f: /* Reassembly abort: no buffers. */ /* Silently increment error counter. */ if (atm_vcc) - atomic_inc(&atm_vcc->stats->rx_drop); + atomic_inc_unchecked(&atm_vcc->stats->rx_drop); break; default: /* Hmm. Haven't written the code to handle the others yet... -- REW */ printk (KERN_WARNING "Don't know what to do with RX status %x: %s.\n", diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f766cc4..a34002e 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -931,9 +931,9 @@ fore200e_tx_irq(struct fore200e* fore200e) #endif /* check error condition */ if (*entry->status & STATUS_ERROR) - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); else - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); } } @@ -1082,7 +1082,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp if (skb == NULL) { DPRINTK(2, "unable to alloc new skb, rx PDU length = %d\n", pdu_len); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); return -ENOMEM; } @@ -1125,14 +1125,14 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); return -ENOMEM; } ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); vcc->push(vcc, skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); ASSERT(atomic_read(&sk_atm(vcc)->sk_wmem_alloc) >= 0); @@ -1210,7 +1210,7 @@ fore200e_rx_irq(struct fore200e* fore200e) DPRINTK(2, "damaged PDU on %d.%d.%d\n", fore200e->atm_dev->number, entry->rpd->atm_header.vpi, entry->rpd->atm_header.vci); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); } } @@ -1655,7 +1655,7 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) goto retry_here; } - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); fore200e->tx_sat++; DPRINTK(2, "tx queue of device %s is saturated, PDU dropped - heartbeat is %08x\n", diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 7066703..2b130de 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1769,7 +1769,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) if (RBRQ_HBUF_ERR(he_dev->rbrq_head)) { hprintk("HBUF_ERR! (cid 0x%x)\n", cid); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); goto return_host_buffers; } @@ -1802,7 +1802,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) RBRQ_LEN_ERR(he_dev->rbrq_head) ? "LEN_ERR" : "", vcc->vpi, vcc->vci); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); goto return_host_buffers; } @@ -1861,7 +1861,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) vcc->push(vcc, skb); spin_lock(&he_dev->global_lock); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); return_host_buffers: ++pdus_assembled; @@ -2206,7 +2206,7 @@ __enqueue_tpd(struct he_dev *he_dev, struct he_tpd *tpd, unsigned cid) tpd->vcc->pop(tpd->vcc, tpd->skb); else dev_kfree_skb_any(tpd->skb); - atomic_inc(&tpd->vcc->stats->tx_err); + atomic_inc_unchecked(&tpd->vcc->stats->tx_err); } pci_pool_free(he_dev->tpd_pool, tpd, TPD_ADDR(tpd->status)); return; @@ -2618,7 +2618,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb) vcc->pop(vcc, skb); else dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -EINVAL; } @@ -2629,7 +2629,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb) vcc->pop(vcc, skb); else dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -EINVAL; } #endif @@ -2641,7 +2641,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb) vcc->pop(vcc, skb); else dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); spin_unlock_irqrestore(&he_dev->global_lock, flags); return -ENOMEM; } @@ -2683,7 +2683,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb) vcc->pop(vcc, skb); else dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); spin_unlock_irqrestore(&he_dev->global_lock, flags); return -ENOMEM; } @@ -2714,7 +2714,7 @@ he_send(struct atm_vcc *vcc, struct sk_buff *skb) __enqueue_tpd(he_dev, tpd, cid); spin_unlock_irqrestore(&he_dev->global_lock, flags); - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); return 0; } diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 4e49021..01b1512 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -1033,7 +1033,7 @@ static void rx_schedule (hrz_dev * dev, int irq) { { struct atm_vcc * vcc = ATM_SKB(skb)->vcc; // VC layer stats - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); __net_timestamp(skb); // end of our responsability vcc->push (vcc, skb); @@ -1185,7 +1185,7 @@ static void tx_schedule (hrz_dev * const dev, int irq) { dev->tx_iovec = NULL; // VC layer stats - atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); + atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); // free the skb hrz_kfree_skb (skb); diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index e33ae00..9deb4ab 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -810,7 +810,7 @@ drain_scq(struct idt77252_dev *card, struct vc_map *vc) else dev_kfree_skb(skb); - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); } atomic_dec(&scq->used); @@ -1073,13 +1073,13 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) if ((sb = dev_alloc_skb(64)) == NULL) { printk("%s: Can't allocate buffers for aal0.\n", card->name); - atomic_add(i, &vcc->stats->rx_drop); + atomic_add_unchecked(i, &vcc->stats->rx_drop); break; } if (!atm_charge(vcc, sb->truesize)) { RXPRINTK("%s: atm_charge() dropped aal0 packets.\n", card->name); - atomic_add(i - 1, &vcc->stats->rx_drop); + atomic_add_unchecked(i - 1, &vcc->stats->rx_drop); dev_kfree_skb(sb); break; } @@ -1096,7 +1096,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) ATM_SKB(sb)->vcc = vcc; __net_timestamp(sb); vcc->push(vcc, sb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); cell += ATM_CELL_PAYLOAD; } @@ -1133,13 +1133,13 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) "(CDC: %08x)\n", card->name, len, rpp->len, readl(SAR_REG_CDC)); recycle_rx_pool_skb(card, rpp); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); return; } if (stat & SAR_RSQE_CRC) { RXPRINTK("%s: AAL5 CRC error.\n", card->name); recycle_rx_pool_skb(card, rpp); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); return; } if (skb_queue_len(&rpp->queue) > 1) { @@ -1150,7 +1150,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) RXPRINTK("%s: Can't alloc RX skb.\n", card->name); recycle_rx_pool_skb(card, rpp); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); return; } if (!atm_charge(vcc, skb->truesize)) { @@ -1169,7 +1169,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) __net_timestamp(skb); vcc->push(vcc, skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); return; } @@ -1191,7 +1191,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) __net_timestamp(skb); vcc->push(vcc, skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); if (skb->truesize > SAR_FB_SIZE_3) add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); @@ -1303,14 +1303,14 @@ idt77252_rx_raw(struct idt77252_dev *card) if (vcc->qos.aal != ATM_AAL0) { RPRINTK("%s: raw cell for non AAL0 vc %u.%u\n", card->name, vpi, vci); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); goto drop; } if ((sb = dev_alloc_skb(64)) == NULL) { printk("%s: Can't allocate buffers for AAL0.\n", card->name); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); goto drop; } @@ -1329,7 +1329,7 @@ idt77252_rx_raw(struct idt77252_dev *card) ATM_SKB(sb)->vcc = vcc; __net_timestamp(sb); vcc->push(vcc, sb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); drop: skb_pull(queue, 64); @@ -1954,13 +1954,13 @@ idt77252_send_skb(struct atm_vcc *vcc, struct sk_buff *skb, int oam) if (vc == NULL) { printk("%s: NULL connection in send().\n", card->name); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb(skb); return -EINVAL; } if (!test_bit(VCF_TX, &vc->flags)) { printk("%s: Trying to transmit on a non-tx VC.\n", card->name); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb(skb); return -EINVAL; } @@ -1972,14 +1972,14 @@ idt77252_send_skb(struct atm_vcc *vcc, struct sk_buff *skb, int oam) break; default: printk("%s: Unsupported AAL: %d\n", card->name, vcc->qos.aal); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb(skb); return -EINVAL; } if (skb_shinfo(skb)->nr_frags != 0) { printk("%s: No scatter-gather yet.\n", card->name); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb(skb); return -EINVAL; } @@ -1987,7 +1987,7 @@ idt77252_send_skb(struct atm_vcc *vcc, struct sk_buff *skb, int oam) err = queue_skb(card, vc, skb, oam); if (err) { - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb(skb); return err; } @@ -2010,7 +2010,7 @@ idt77252_send_oam(struct atm_vcc *vcc, void *cell, int flags) skb = dev_alloc_skb(64); if (!skb) { printk("%s: Out of memory in send_oam().\n", card->name); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -ENOMEM; } atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index b2c1b37..faa672b 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1123,7 +1123,7 @@ static int rx_pkt(struct atm_dev *dev) status = (u_short) (buf_desc_ptr->desc_mode); if (status & (RX_CER | RX_PTE | RX_OFL)) { - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); IF_ERR(printk("IA: bad packet, dropping it");) if (status & RX_CER) { IF_ERR(printk(" cause: packet CRC error\n");) @@ -1146,7 +1146,7 @@ static int rx_pkt(struct atm_dev *dev) len = dma_addr - buf_addr; if (len > iadev->rx_buf_sz) { printk("Over %d bytes sdu received, dropped!!!\n", iadev->rx_buf_sz); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); goto out_free_desc; } @@ -1296,7 +1296,7 @@ static void rx_dle_intr(struct atm_dev *dev) ia_vcc = INPH_IA_VCC(vcc); if (ia_vcc == NULL) { - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); dev_kfree_skb_any(skb); atm_return(vcc, atm_guess_pdu2truesize(len)); goto INCR_DLE; @@ -1308,7 +1308,7 @@ static void rx_dle_intr(struct atm_dev *dev) if ((length > iadev->rx_buf_sz) || (length > (skb->len - sizeof(struct cpcs_trailer)))) { - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); IF_ERR(printk("rx_dle_intr: Bad AAL5 trailer %d (skb len %d)", length, skb->len);) dev_kfree_skb_any(skb); @@ -1324,7 +1324,7 @@ static void rx_dle_intr(struct atm_dev *dev) IF_RX(printk("rx_dle_intr: skb push");) vcc->push(vcc,skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); iadev->rx_pkt_cnt++; } INCR_DLE: @@ -2806,15 +2806,15 @@ static int ia_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) { struct k_sonet_stats *stats; stats = &PRIV(_ia_dev[board])->sonet_stats; - printk("section_bip: %d\n", atomic_read(&stats->section_bip)); - printk("line_bip : %d\n", atomic_read(&stats->line_bip)); - printk("path_bip : %d\n", atomic_read(&stats->path_bip)); - printk("line_febe : %d\n", atomic_read(&stats->line_febe)); - printk("path_febe : %d\n", atomic_read(&stats->path_febe)); - printk("corr_hcs : %d\n", atomic_read(&stats->corr_hcs)); - printk("uncorr_hcs : %d\n", atomic_read(&stats->uncorr_hcs)); - printk("tx_cells : %d\n", atomic_read(&stats->tx_cells)); - printk("rx_cells : %d\n", atomic_read(&stats->rx_cells)); + printk("section_bip: %d\n", atomic_read_unchecked(&stats->section_bip)); + printk("line_bip : %d\n", atomic_read_unchecked(&stats->line_bip)); + printk("path_bip : %d\n", atomic_read_unchecked(&stats->path_bip)); + printk("line_febe : %d\n", atomic_read_unchecked(&stats->line_febe)); + printk("path_febe : %d\n", atomic_read_unchecked(&stats->path_febe)); + printk("corr_hcs : %d\n", atomic_read_unchecked(&stats->corr_hcs)); + printk("uncorr_hcs : %d\n", atomic_read_unchecked(&stats->uncorr_hcs)); + printk("tx_cells : %d\n", atomic_read_unchecked(&stats->tx_cells)); + printk("rx_cells : %d\n", atomic_read_unchecked(&stats->rx_cells)); } ia_cmds.status = 0; break; @@ -2919,7 +2919,7 @@ static int ia_pkt_tx (struct atm_vcc *vcc, struct sk_buff *skb) { if ((desc == 0) || (desc > iadev->num_tx_desc)) { IF_ERR(printk(DEV_LABEL "invalid desc for send: %d\n", desc);) - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); if (vcc->pop) vcc->pop(vcc, skb); else @@ -3024,14 +3024,14 @@ static int ia_pkt_tx (struct atm_vcc *vcc, struct sk_buff *skb) { ATM_DESC(skb) = vcc->vci; skb_queue_tail(&iadev->tx_dma_q, skb); - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); iadev->tx_pkt_cnt++; /* Increment transaction counter */ writel(2, iadev->dma+IPHASE5575_TX_COUNTER); #if 0 /* add flow control logic */ - if (atomic_read(&vcc->stats->tx) % 20 == 0) { + if (atomic_read_unchecked(&vcc->stats->tx) % 20 == 0) { if (iavcc->vc_desc_cnt > 10) { vcc->tx_quota = vcc->tx_quota * 3 / 4; printk("Tx1: vcc->tx_quota = %d \n", (u32)vcc->tx_quota ); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index cf97c34..8d30655 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1305,7 +1305,7 @@ static void lanai_send_one_aal5(struct lanai_dev *lanai, vcc_tx_add_aal5_trailer(lvcc, skb->len, 0, 0); lanai_endtx(lanai, lvcc); lanai_free_skb(lvcc->tx.atmvcc, skb); - atomic_inc(&lvcc->tx.atmvcc->stats->tx); + atomic_inc_unchecked(&lvcc->tx.atmvcc->stats->tx); } /* Try to fill the buffer - don't call unless there is backlog */ @@ -1428,7 +1428,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr) ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; __net_timestamp(skb); lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); - atomic_inc(&lvcc->rx.atmvcc->stats->rx); + atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx); out: lvcc->rx.buf.ptr = end; cardvcc_write(lvcc, endptr, vcc_rxreadptr); @@ -1670,7 +1670,7 @@ static int handle_service(struct lanai_dev *lanai, u32 s) DPRINTK("(itf %d) got RX service entry 0x%X for non-AAL5 " "vcc %d\n", lanai->number, (unsigned int) s, vci); lanai->stats.service_rxnotaal5++; - atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); + atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); return 0; } if (likely(!(s & (SERVICE_TRASH | SERVICE_STREAM | SERVICE_CRCERR)))) { @@ -1682,7 +1682,7 @@ static int handle_service(struct lanai_dev *lanai, u32 s) int bytes; read_unlock(&vcc_sklist_lock); DPRINTK("got trashed rx pdu on vci %d\n", vci); - atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); + atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); lvcc->stats.x.aal5.service_trash++; bytes = (SERVICE_GET_END(s) * 16) - (((unsigned long) lvcc->rx.buf.ptr) - @@ -1694,7 +1694,7 @@ static int handle_service(struct lanai_dev *lanai, u32 s) } if (s & SERVICE_STREAM) { read_unlock(&vcc_sklist_lock); - atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); + atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); lvcc->stats.x.aal5.service_stream++; printk(KERN_ERR DEV_LABEL "(itf %d): Got AAL5 stream " "PDU on VCI %d!\n", lanai->number, vci); @@ -1702,7 +1702,7 @@ static int handle_service(struct lanai_dev *lanai, u32 s) return 0; } DPRINTK("got rx crc error on vci %d\n", vci); - atomic_inc(&lvcc->rx.atmvcc->stats->rx_err); + atomic_inc_unchecked(&lvcc->rx.atmvcc->stats->rx_err); lvcc->stats.x.aal5.service_rxcrc++; lvcc->rx.buf.ptr = &lvcc->rx.buf.start[SERVICE_GET_END(s) * 4]; cardvcc_write(lvcc, SERVICE_GET_END(s), vcc_rxreadptr); diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 3da804b..d3b0eed 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -1723,7 +1723,7 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if ((vc = (vc_map *) vcc->dev_data) == NULL) { printk("nicstar%d: vcc->dev_data == NULL on ns_send().\n", card->index); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb_any(skb); return -EINVAL; } @@ -1731,7 +1731,7 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (!vc->tx) { printk("nicstar%d: Trying to transmit on a non-tx VC.\n", card->index); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb_any(skb); return -EINVAL; } @@ -1739,7 +1739,7 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (vcc->qos.aal != ATM_AAL5 && vcc->qos.aal != ATM_AAL0) { printk("nicstar%d: Only AAL0 and AAL5 are supported.\n", card->index); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb_any(skb); return -EINVAL; } @@ -1747,7 +1747,7 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (skb_shinfo(skb)->nr_frags != 0) { printk("nicstar%d: No scatter-gather yet.\n", card->index); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb_any(skb); return -EINVAL; } @@ -1792,11 +1792,11 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (push_scqe(card, vc, scq, &scqe, skb) != 0) { - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); dev_kfree_skb_any(skb); return -EIO; } - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); return 0; } @@ -2111,14 +2111,14 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) { printk("nicstar%d: Can't allocate buffers for aal0.\n", card->index); - atomic_add(i,&vcc->stats->rx_drop); + atomic_add_unchecked(i,&vcc->stats->rx_drop); break; } if (!atm_charge(vcc, sb->truesize)) { RXPRINTK("nicstar%d: atm_charge() dropped aal0 packets.\n", card->index); - atomic_add(i-1,&vcc->stats->rx_drop); /* already increased by 1 */ + atomic_add_unchecked(i-1,&vcc->stats->rx_drop); /* already increased by 1 */ dev_kfree_skb_any(sb); break; } @@ -2133,7 +2133,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) ATM_SKB(sb)->vcc = vcc; __net_timestamp(sb); vcc->push(vcc, sb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); cell += ATM_CELL_PAYLOAD; } @@ -2152,7 +2152,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (iovb == NULL) { printk("nicstar%d: Out of iovec buffers.\n", card->index); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); recycle_rx_buf(card, skb); return; } @@ -2182,7 +2182,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) else if (NS_SKB(iovb)->iovcnt >= NS_MAX_IOVECS) { printk("nicstar%d: received too big AAL5 SDU.\n", card->index); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS); NS_SKB(iovb)->iovcnt = 0; iovb->len = 0; @@ -2202,7 +2202,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) printk("nicstar%d: Expected a small buffer, and this is not one.\n", card->index); which_list(card, skb); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); recycle_rx_buf(card, skb); vc->rx_iov = NULL; recycle_iov_buf(card, iovb); @@ -2216,7 +2216,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) printk("nicstar%d: Expected a large buffer, and this is not one.\n", card->index); which_list(card, skb); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_SKB(iovb)->iovcnt); vc->rx_iov = NULL; @@ -2240,7 +2240,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) printk(" - PDU size mismatch.\n"); else printk(".\n"); - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_SKB(iovb)->iovcnt); vc->rx_iov = NULL; @@ -2256,7 +2256,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (!atm_charge(vcc, skb->truesize)) { push_rxbufs(card, skb); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); } else { @@ -2268,7 +2268,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) ATM_SKB(skb)->vcc = vcc; __net_timestamp(skb); vcc->push(vcc, skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); } } else if (NS_SKB(iovb)->iovcnt == 2) /* One small plus one large buffer */ @@ -2283,7 +2283,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (!atm_charge(vcc, sb->truesize)) { push_rxbufs(card, sb); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); } else { @@ -2295,7 +2295,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) ATM_SKB(sb)->vcc = vcc; __net_timestamp(sb); vcc->push(vcc, sb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); } push_rxbufs(card, skb); @@ -2306,7 +2306,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (!atm_charge(vcc, skb->truesize)) { push_rxbufs(card, skb); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); } else { @@ -2320,7 +2320,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) ATM_SKB(skb)->vcc = vcc; __net_timestamp(skb); vcc->push(vcc, skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); } push_rxbufs(card, sb); @@ -2342,7 +2342,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (hb == NULL) { printk("nicstar%d: Out of huge buffers.\n", card->index); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_SKB(iovb)->iovcnt); vc->rx_iov = NULL; @@ -2393,7 +2393,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) } else dev_kfree_skb_any(hb); - atomic_inc(&vcc->stats->rx_drop); + atomic_inc_unchecked(&vcc->stats->rx_drop); } else { @@ -2427,7 +2427,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) #endif /* NS_USE_DESTRUCTORS */ __net_timestamp(hb); vcc->push(vcc, hb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); } } diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 84c93ff..e6ed269 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -708,7 +708,7 @@ void solos_bh(unsigned long card_arg) } atm_charge(vcc, skb->truesize); vcc->push(vcc, skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); break; case PKT_STATUS: @@ -914,6 +914,8 @@ static int print_buffer(struct sk_buff *buf) char msg[500]; char item[10]; + pax_track_stack(); + len = buf->len; for (i = 0; i < len; i++){ if(i % 8 == 0) @@ -1023,7 +1025,7 @@ static uint32_t fpga_tx(struct solos_card *card) vcc = SKB_CB(oldskb)->vcc; if (vcc) { - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); solos_pop(vcc, oldskb); } else dev_kfree_skb_irq(oldskb); diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c index 6dd3f59..ee377f3 100644 --- a/drivers/atm/suni.c +++ b/drivers/atm/suni.c @@ -49,8 +49,8 @@ static DEFINE_SPINLOCK(sunis_lock); #define ADD_LIMITED(s,v) \ - atomic_add((v),&stats->s); \ - if (atomic_read(&stats->s) < 0) atomic_set(&stats->s,INT_MAX); + atomic_add_unchecked((v),&stats->s); \ + if (atomic_read_unchecked(&stats->s) < 0) atomic_set_unchecked(&stats->s,INT_MAX); static void suni_hz(unsigned long from_timer) diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c index fc8cb07..4a80e53 100644 --- a/drivers/atm/uPD98402.c +++ b/drivers/atm/uPD98402.c @@ -41,7 +41,7 @@ static int fetch_stats(struct atm_dev *dev,struct sonet_stats __user *arg,int ze struct sonet_stats tmp; int error = 0; - atomic_add(GET(HECCT),&PRIV(dev)->sonet_stats.uncorr_hcs); + atomic_add_unchecked(GET(HECCT),&PRIV(dev)->sonet_stats.uncorr_hcs); sonet_copy_stats(&PRIV(dev)->sonet_stats,&tmp); if (arg) error = copy_to_user(arg,&tmp,sizeof(tmp)); if (zero && !error) { @@ -160,9 +160,9 @@ static int uPD98402_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) #define ADD_LIMITED(s,v) \ - { atomic_add(GET(v),&PRIV(dev)->sonet_stats.s); \ - if (atomic_read(&PRIV(dev)->sonet_stats.s) < 0) \ - atomic_set(&PRIV(dev)->sonet_stats.s,INT_MAX); } + { atomic_add_unchecked(GET(v),&PRIV(dev)->sonet_stats.s); \ + if (atomic_read_unchecked(&PRIV(dev)->sonet_stats.s) < 0) \ + atomic_set_unchecked(&PRIV(dev)->sonet_stats.s,INT_MAX); } static void stat_event(struct atm_dev *dev) @@ -193,7 +193,7 @@ static void uPD98402_int(struct atm_dev *dev) if (reason & uPD98402_INT_PFM) stat_event(dev); if (reason & uPD98402_INT_PCO) { (void) GET(PCOCR); /* clear interrupt cause */ - atomic_add(GET(HECCT), + atomic_add_unchecked(GET(HECCT), &PRIV(dev)->sonet_stats.uncorr_hcs); } if ((reason & uPD98402_INT_RFO) && @@ -221,9 +221,9 @@ static int uPD98402_start(struct atm_dev *dev) PUT(~(uPD98402_INT_PFM | uPD98402_INT_ALM | uPD98402_INT_RFO | uPD98402_INT_LOS),PIMR); /* enable them */ (void) fetch_stats(dev,NULL,1); /* clear kernel counters */ - atomic_set(&PRIV(dev)->sonet_stats.corr_hcs,-1); - atomic_set(&PRIV(dev)->sonet_stats.tx_cells,-1); - atomic_set(&PRIV(dev)->sonet_stats.rx_cells,-1); + atomic_set_unchecked(&PRIV(dev)->sonet_stats.corr_hcs,-1); + atomic_set_unchecked(&PRIV(dev)->sonet_stats.tx_cells,-1); + atomic_set_unchecked(&PRIV(dev)->sonet_stats.rx_cells,-1); return 0; } diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 2e9635b..32927b4 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -458,7 +458,7 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); } if (!size) { dev_kfree_skb_irq(skb); - if (vcc) atomic_inc(&vcc->stats->rx_err); + if (vcc) atomic_inc_unchecked(&vcc->stats->rx_err); continue; } if (!atm_charge(vcc,skb->truesize)) { @@ -468,7 +468,7 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); skb->len = size; ATM_SKB(skb)->vcc = vcc; vcc->push(vcc,skb); - atomic_inc(&vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->rx); } zout(pos & 0xffff,MTA(mbx)); #if 0 /* probably a stupid idea */ @@ -732,7 +732,7 @@ if (*ZATM_PRV_DSC(skb) != (uPD98401_TXPD_V | uPD98401_TXPD_DP | skb_queue_head(&zatm_vcc->backlog,skb); break; } - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); wake_up(&zatm_vcc->tx_wait); } diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 63c143e..fece183 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -70,7 +70,7 @@ static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr, return ret; } -static struct sysfs_ops driver_sysfs_ops = { +static const struct sysfs_ops driver_sysfs_ops = { .show = drv_attr_show, .store = drv_attr_store, }; @@ -115,7 +115,7 @@ static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, return ret; } -static struct sysfs_ops bus_sysfs_ops = { +static const struct sysfs_ops bus_sysfs_ops = { .show = bus_attr_show, .store = bus_attr_store, }; @@ -154,7 +154,7 @@ static int bus_uevent_filter(struct kset *kset, struct kobject *kobj) return 0; } -static struct kset_uevent_ops bus_uevent_ops = { +static const struct kset_uevent_ops bus_uevent_ops = { .filter = bus_uevent_filter, }; diff --git a/drivers/base/class.c b/drivers/base/class.c index 6e2c3b0..cb61871 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -63,7 +63,7 @@ static void class_release(struct kobject *kobj) kfree(cp); } -static struct sysfs_ops class_sysfs_ops = { +static const struct sysfs_ops class_sysfs_ops = { .show = class_attr_show, .store = class_attr_store, }; diff --git a/drivers/base/core.c b/drivers/base/core.c index f33d768..a9358d0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -100,7 +100,7 @@ static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr, return ret; } -static struct sysfs_ops dev_sysfs_ops = { +static const struct sysfs_ops dev_sysfs_ops = { .show = dev_attr_show, .store = dev_attr_store, }; @@ -252,7 +252,7 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, return retval; } -static struct kset_uevent_ops device_uevent_ops = { +static const struct kset_uevent_ops device_uevent_ops = { .filter = dev_uevent_filter, .name = dev_uevent_name, .uevent = dev_uevent, diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 989429c..2272b00 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -44,7 +44,7 @@ static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uev return retval; } -static struct kset_uevent_ops memory_uevent_ops = { +static const struct kset_uevent_ops memory_uevent_ops = { .name = memory_uevent_name, .uevent = memory_uevent, }; diff --git a/drivers/base/sys.c b/drivers/base/sys.c index 3f202f7..61c4a6f 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -54,7 +54,7 @@ sysdev_store(struct kobject *kobj, struct attribute *attr, return -EIO; } -static struct sysfs_ops sysfs_ops = { +static const struct sysfs_ops sysfs_ops = { .show = sysdev_show, .store = sysdev_store, }; @@ -104,7 +104,7 @@ static ssize_t sysdev_class_store(struct kobject *kobj, struct attribute *attr, return -EIO; } -static struct sysfs_ops sysfs_class_ops = { +static const struct sysfs_ops sysfs_class_ops = { .show = sysdev_class_show, .store = sysdev_class_store, }; diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index eb4fa19..1954777 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -1973,6 +1973,8 @@ static bool DAC960_V1_ReadDeviceConfiguration(DAC960_Controller_T unsigned long flags; int Channel, TargetID; + pax_track_stack(); + if (!init_dma_loaf(Controller->PCIDevice, &local_dma, DAC960_V1_MaxChannels*(sizeof(DAC960_V1_DCDB_T) + sizeof(DAC960_SCSI_Inquiry_T) + diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 68b90d9..7e2e3f3 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1011,6 +1011,8 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, int err; u32 cp; + memset(&arg64, 0, sizeof(arg64)); + err = 0; err |= copy_from_user(&arg64.LUN_info, &arg32->LUN_info, @@ -2852,7 +2854,7 @@ static unsigned long pollcomplete(int ctlr) /* Wait (up to 20 seconds) for a command to complete */ for (i = 20 * HZ; i > 0; i--) { - done = hba[ctlr]->access.command_completed(hba[ctlr]); + done = hba[ctlr]->access->command_completed(hba[ctlr]); if (done == FIFO_EMPTY) schedule_timeout_uninterruptible(1); else @@ -2876,7 +2878,7 @@ static int sendcmd_core(ctlr_info_t *h, CommandList_struct *c) resend_cmd1: /* Disable interrupt on the board. */ - h->access.set_intr_mask(h, CCISS_INTR_OFF); + h->access->set_intr_mask(h, CCISS_INTR_OFF); /* Make sure there is room in the command FIFO */ /* Actually it should be completely empty at this time */ @@ -2884,13 +2886,13 @@ resend_cmd1: /* tape side of the driver. */ for (i = 200000; i > 0; i--) { /* if fifo isn't full go */ - if (!(h->access.fifo_full(h))) + if (!(h->access->fifo_full(h))) break; udelay(10); printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full," " waiting!\n", h->ctlr); } - h->access.submit_command(h, c); /* Send the cmd */ + h->access->submit_command(h, c); /* Send the cmd */ do { complete = pollcomplete(h->ctlr); @@ -3023,7 +3025,7 @@ static void start_io(ctlr_info_t *h) while (!hlist_empty(&h->reqQ)) { c = hlist_entry(h->reqQ.first, CommandList_struct, list); /* can't do anything if fifo is full */ - if ((h->access.fifo_full(h))) { + if ((h->access->fifo_full(h))) { printk(KERN_WARNING "cciss: fifo full\n"); break; } @@ -3033,7 +3035,7 @@ static void start_io(ctlr_info_t *h) h->Qdepth--; /* Tell the controller execute command */ - h->access.submit_command(h, c); + h->access->submit_command(h, c); /* Put job onto the completed Q */ addQ(&h->cmpQ, c); @@ -3393,17 +3395,17 @@ startio: static inline unsigned long get_next_completion(ctlr_info_t *h) { - return h->access.command_completed(h); + return h->access->command_completed(h); } static inline int interrupt_pending(ctlr_info_t *h) { - return h->access.intr_pending(h); + return h->access->intr_pending(h); } static inline long interrupt_not_for_us(ctlr_info_t *h) { - return (((h->access.intr_pending(h) == 0) || + return (((h->access->intr_pending(h) == 0) || (h->interrupts_enabled == 0))); } @@ -3892,7 +3894,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev) */ c->max_commands = readl(&(c->cfgtable->CmdsOutMax)); c->product_name = products[prod_index].product_name; - c->access = *(products[prod_index].access); + c->access = products[prod_index].access; c->nr_cmds = c->max_commands - 4; if ((readb(&c->cfgtable->Signature[0]) != 'C') || (readb(&c->cfgtable->Signature[1]) != 'I') || @@ -4291,7 +4293,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, } /* make sure the board interrupts are off */ - hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF); + hba[i]->access->set_intr_mask(hba[i], CCISS_INTR_OFF); if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr, IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) { printk(KERN_ERR "cciss: Unable to get irq %d for %s\n", @@ -4341,7 +4343,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, cciss_scsi_setup(i); /* Turn the interrupts on so we can service requests */ - hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON); + hba[i]->access->set_intr_mask(hba[i], CCISS_INTR_ON); /* Get the firmware version */ inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL); diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 04d6bf8..36e712d 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -90,7 +90,7 @@ struct ctlr_info // information about each logical volume drive_info_struct *drv[CISS_MAX_LUN]; - struct access_method access; + struct access_method *access; /* queue and queue Info */ struct hlist_head reqQ; diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 6422651..bb1bdef 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -402,7 +402,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) if (register_blkdev(COMPAQ_SMART2_MAJOR+i, hba[i]->devname)) { goto Enomem4; } - hba[i]->access.set_intr_mask(hba[i], 0); + hba[i]->access->set_intr_mask(hba[i], 0); if (request_irq(hba[i]->intr, do_ida_intr, IRQF_DISABLED|IRQF_SHARED, hba[i]->devname, hba[i])) { @@ -460,7 +460,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) add_timer(&hba[i]->timer); /* Enable IRQ now that spinlock and rate limit timer are set up */ - hba[i]->access.set_intr_mask(hba[i], FIFO_NOT_EMPTY); + hba[i]->access->set_intr_mask(hba[i], FIFO_NOT_EMPTY); for(j=0; jproduct_name = products[i].product_name; - c->access = *(products[i].access); + c->access = products[i].access; break; } } @@ -793,7 +793,7 @@ static int __init cpqarray_eisa_detect(void) hba[ctlr]->intr = intr; sprintf(hba[ctlr]->devname, "ida%d", nr_ctlr); hba[ctlr]->product_name = products[j].product_name; - hba[ctlr]->access = *(products[j].access); + hba[ctlr]->access = products[j].access; hba[ctlr]->ctlr = ctlr; hba[ctlr]->board_id = board_id; hba[ctlr]->pci_dev = NULL; /* not PCI */ @@ -896,6 +896,8 @@ static void do_ida_request(struct request_queue *q) struct scatterlist tmp_sg[SG_MAX]; int i, dir, seg; + pax_track_stack(); + if (blk_queue_plugged(q)) goto startio; @@ -968,7 +970,7 @@ static void start_io(ctlr_info_t *h) while((c = h->reqQ) != NULL) { /* Can't do anything if we're busy */ - if (h->access.fifo_full(h) == 0) + if (h->access->fifo_full(h) == 0) return; /* Get the first entry from the request Q */ @@ -976,7 +978,7 @@ static void start_io(ctlr_info_t *h) h->Qdepth--; /* Tell the controller to do our bidding */ - h->access.submit_command(h, c); + h->access->submit_command(h, c); /* Get onto the completion Q */ addQ(&h->cmpQ, c); @@ -1038,7 +1040,7 @@ static irqreturn_t do_ida_intr(int irq, void *dev_id) unsigned long flags; __u32 a,a1; - istat = h->access.intr_pending(h); + istat = h->access->intr_pending(h); /* Is this interrupt for us? */ if (istat == 0) return IRQ_NONE; @@ -1049,7 +1051,7 @@ static irqreturn_t do_ida_intr(int irq, void *dev_id) */ spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); if (istat & FIFO_NOT_EMPTY) { - while((a = h->access.command_completed(h))) { + while((a = h->access->command_completed(h))) { a1 = a; a &= ~3; if ((c = h->cmpQ) == NULL) { @@ -1434,11 +1436,11 @@ static int sendcmd( /* * Disable interrupt */ - info_p->access.set_intr_mask(info_p, 0); + info_p->access->set_intr_mask(info_p, 0); /* Make sure there is room in the command FIFO */ /* Actually it should be completely empty at this time. */ for (i = 200000; i > 0; i--) { - temp = info_p->access.fifo_full(info_p); + temp = info_p->access->fifo_full(info_p); if (temp != 0) { break; } @@ -1451,7 +1453,7 @@ DBG( /* * Send the cmd */ - info_p->access.submit_command(info_p, c); + info_p->access->submit_command(info_p, c); complete = pollcomplete(ctlr); pci_unmap_single(info_p->pci_dev, (dma_addr_t) c->req.sg[0].addr, @@ -1534,9 +1536,9 @@ static int revalidate_allvol(ctlr_info_t *host) * we check the new geometry. Then turn interrupts back on when * we're done. */ - host->access.set_intr_mask(host, 0); + host->access->set_intr_mask(host, 0); getgeometry(ctlr); - host->access.set_intr_mask(host, FIFO_NOT_EMPTY); + host->access->set_intr_mask(host, FIFO_NOT_EMPTY); for(i=0; i 0; i--) { - done = hba[ctlr]->access.command_completed(hba[ctlr]); + done = hba[ctlr]->access->command_completed(hba[ctlr]); if (done == 0) { udelay(10); /* a short fixed delay */ } else diff --git a/drivers/block/cpqarray.h b/drivers/block/cpqarray.h index be73e9d..7fbf140 100644 --- a/drivers/block/cpqarray.h +++ b/drivers/block/cpqarray.h @@ -99,7 +99,7 @@ struct ctlr_info { drv_info_t drv[NWD]; struct proc_dir_entry *proc; - struct access_method access; + struct access_method *access; cmdlist_t *reqQ; cmdlist_t *cmpQ; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 8ec2d70..2804b30 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -282,7 +282,7 @@ static int __do_lo_send_write(struct file *file, mm_segment_t old_fs = get_fs(); set_fs(get_ds()); - bw = file->f_op->write(file, buf, len, &pos); + bw = file->f_op->write(file, (const char __force_user *)buf, len, &pos); set_fs(old_fs); if (likely(bw == len)) return 0; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 26ada47..083c480 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -155,6 +155,8 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, struct kvec iov; sigset_t blocked, oldset; + pax_track_stack(); + if (unlikely(!sock)) { printk(KERN_ERR "%s: Attempted %s on closed socket in sock_xmit\n", lo->disk->disk_name, (send ? "send" : "recv")); @@ -569,6 +571,8 @@ static void do_nbd_request(struct request_queue *q) static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo, unsigned int cmd, unsigned long arg) { + pax_track_stack(); + switch (cmd) { case NBD_DISCONNECT: { struct request sreq; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a5d585d..d087be3 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -284,7 +284,7 @@ static ssize_t kobj_pkt_store(struct kobject *kobj, return len; } -static struct sysfs_ops kobj_pkt_ops = { +static const struct sysfs_ops kobj_pkt_ops = { .show = kobj_pkt_show, .store = kobj_pkt_store }; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 6aad99e..89cd142 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -90,7 +90,8 @@ config VT_HW_CONSOLE_BINDING config DEVKMEM bool "/dev/kmem virtual device support" - default y + default n + depends on !GRKERNSEC_KMEM help Say Y here if you want to support the /dev/kmem device. The /dev/kmem device is rarely used, but can be used for certain @@ -1114,6 +1115,7 @@ config DEVPORT bool depends on !M68K depends on ISA || PCI + depends on !GRKERNSEC_KMEM default y source "drivers/s390/char/Kconfig" diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c index a96f319..a778a5b 100644 --- a/drivers/char/agp/frontend.c +++ b/drivers/char/agp/frontend.c @@ -824,7 +824,7 @@ static int agpioc_reserve_wrap(struct agp_file_private *priv, void __user *arg) if (copy_from_user(&reserve, arg, sizeof(struct agp_region))) return -EFAULT; - if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment)) + if ((unsigned) reserve.seg_count >= ~0U/sizeof(struct agp_segment_priv)) return -EFAULT; client = agp_find_client_by_pid(reserve.pid); diff --git a/drivers/char/briq_panel.c b/drivers/char/briq_panel.c index d8cff90..9628e70 100644 --- a/drivers/char/briq_panel.c +++ b/drivers/char/briq_panel.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,7 @@ static int vfd_is_open; static unsigned char vfd[40]; static int vfd_cursor; static unsigned char ledpb, led; +static DEFINE_MUTEX(vfd_mutex); static void update_vfd(void) { @@ -142,12 +144,15 @@ static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_ if (!vfd_is_open) return -EBUSY; + mutex_lock(&vfd_mutex); for (;;) { char c; if (!indx) break; - if (get_user(c, buf)) + if (get_user(c, buf)) { + mutex_unlock(&vfd_mutex); return -EFAULT; + } if (esc) { set_led(c); esc = 0; @@ -177,6 +182,7 @@ static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_ buf++; } update_vfd(); + mutex_unlock(&vfd_mutex); return len; } diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c index 31e7c91..161afc0 100644 --- a/drivers/char/genrtc.c +++ b/drivers/char/genrtc.c @@ -272,6 +272,7 @@ static int gen_rtc_ioctl(struct inode *inode, struct file *file, switch (cmd) { case RTC_PLL_GET: + memset(&pll, 0, sizeof(pll)); if (get_rtc_pll(&pll)) return -EINVAL; else diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 006466d..a2bb21c 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -430,7 +430,7 @@ static int hpet_release(struct inode *inode, struct file *file) return 0; } -static int hpet_ioctl_common(struct hpet_dev *, int, unsigned long, int); +static int hpet_ioctl_common(struct hpet_dev *, unsigned int, unsigned long, int); static int hpet_ioctl(struct inode *inode, struct file *file, unsigned int cmd, @@ -565,7 +565,7 @@ static inline unsigned long hpet_time_div(struct hpets *hpets, } static int -hpet_ioctl_common(struct hpet_dev *devp, int cmd, unsigned long arg, int kernel) +hpet_ioctl_common(struct hpet_dev *devp, unsigned int cmd, unsigned long arg, int kernel) { struct hpet_timer __iomem *timer; struct hpet __iomem *hpet; @@ -608,11 +608,11 @@ hpet_ioctl_common(struct hpet_dev *devp, int cmd, unsigned long arg, int kernel) { struct hpet_info info; + memset(&info, 0, sizeof(info)); + if (devp->hd_ireqfreq) info.hi_ireqfreq = hpet_time_div(hpetp, devp->hd_ireqfreq); - else - info.hi_ireqfreq = 0; info.hi_flags = readq(&timer->hpet_config) & Tn_PER_INT_CAP_MASK; info.hi_hpet = hpetp->hp_which; diff --git a/drivers/char/hvc_beat.c b/drivers/char/hvc_beat.c index 0afc8b8..6913fc3 100644 --- a/drivers/char/hvc_beat.c +++ b/drivers/char/hvc_beat.c @@ -84,7 +84,7 @@ static int hvc_beat_put_chars(uint32_t vtermno, const char *buf, int cnt) return cnt; } -static struct hv_ops hvc_beat_get_put_ops = { +static const struct hv_ops hvc_beat_get_put_ops = { .get_chars = hvc_beat_get_chars, .put_chars = hvc_beat_put_chars, }; diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 98097f2..407dddc 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -125,7 +125,7 @@ static struct hvc_struct *hvc_get_by_index(int index) * console interfaces but can still be used as a tty device. This has to be * static because kmalloc will not work during early console init. */ -static struct hv_ops *cons_ops[MAX_NR_HVC_CONSOLES]; +static const struct hv_ops *cons_ops[MAX_NR_HVC_CONSOLES]; static uint32_t vtermnos[MAX_NR_HVC_CONSOLES] = {[0 ... MAX_NR_HVC_CONSOLES - 1] = -1}; @@ -249,7 +249,7 @@ static void destroy_hvc_struct(struct kref *kref) * vty adapters do NOT get an hvc_instantiate() callback since they * appear after early console init. */ -int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) +int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops) { struct hvc_struct *hp; @@ -758,7 +758,7 @@ static const struct tty_operations hvc_ops = { }; struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, - struct hv_ops *ops, int outbuf_size) + const struct hv_ops *ops, int outbuf_size) { struct hvc_struct *hp; int i; diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 10950ca..ed176c3 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -55,7 +55,7 @@ struct hvc_struct { int outbuf_size; int n_outbuf; uint32_t vtermno; - struct hv_ops *ops; + const struct hv_ops *ops; int irq_requested; int data; struct winsize ws; @@ -76,11 +76,11 @@ struct hv_ops { }; /* Register a vterm and a slot index for use as a console (console_init) */ -extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); +extern int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data, - struct hv_ops *ops, int outbuf_size); + const struct hv_ops *ops, int outbuf_size); /* remove a vterm from hvc tty operation (module_exit or hotplug remove) */ extern int hvc_remove(struct hvc_struct *hp); diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index 936d05b..fd02426 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -197,7 +197,7 @@ done: return sent; } -static struct hv_ops hvc_get_put_ops = { +static const struct hv_ops hvc_get_put_ops = { .get_chars = get_chars, .put_chars = put_chars, .notifier_add = notifier_add_irq, diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index b0e168f..69cda2a 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -924,7 +924,7 @@ static int hvc_iucv_pm_restore_thaw(struct device *dev) /* HVC operations */ -static struct hv_ops hvc_iucv_ops = { +static const struct hv_ops hvc_iucv_ops = { .get_chars = hvc_iucv_get_chars, .put_chars = hvc_iucv_put_chars, .notifier_add = hvc_iucv_notifier_add, diff --git a/drivers/char/hvc_rtas.c b/drivers/char/hvc_rtas.c index 88590d0..61c4a61 100644 --- a/drivers/char/hvc_rtas.c +++ b/drivers/char/hvc_rtas.c @@ -71,7 +71,7 @@ static int hvc_rtas_read_console(uint32_t vtermno, char *buf, int count) return i; } -static struct hv_ops hvc_rtas_get_put_ops = { +static const struct hv_ops hvc_rtas_get_put_ops = { .get_chars = hvc_rtas_read_console, .put_chars = hvc_rtas_write_console, }; diff --git a/drivers/char/hvc_udbg.c b/drivers/char/hvc_udbg.c index bd63ba8..b0957e6 100644 --- a/drivers/char/hvc_udbg.c +++ b/drivers/char/hvc_udbg.c @@ -58,7 +58,7 @@ static int hvc_udbg_get(uint32_t vtermno, char *buf, int count) return i; } -static struct hv_ops hvc_udbg_ops = { +static const struct hv_ops hvc_udbg_ops = { .get_chars = hvc_udbg_get, .put_chars = hvc_udbg_put, }; diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 10be343..27370e9 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -77,7 +77,7 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count) return got; } -static struct hv_ops hvc_get_put_ops = { +static const struct hv_ops hvc_get_put_ops = { .get_chars = filtered_get_chars, .put_chars = hvc_put_chars, .notifier_add = notifier_add_irq, diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index a6ee32b..94f8c26 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -120,7 +120,7 @@ static int read_console(uint32_t vtermno, char *buf, int len) return recv; } -static struct hv_ops hvc_ops = { +static const struct hv_ops hvc_ops = { .get_chars = read_console, .put_chars = write_console, .notifier_add = notifier_add_irq, diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 266b858..f3ee0bb 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -82,6 +82,7 @@ #include #include #include +#include /* * 1.3.0 -> 1.3.1 In hvcs_open memset(..,0x00,..) instead of memset(..,0x3F,00). @@ -269,7 +270,7 @@ struct hvcs_struct { unsigned int index; struct tty_struct *tty; - int open_count; + local_t open_count; /* * Used to tell the driver kernel_thread what operations need to take @@ -419,7 +420,7 @@ static ssize_t hvcs_vterm_state_store(struct device *dev, struct device_attribut spin_lock_irqsave(&hvcsd->lock, flags); - if (hvcsd->open_count > 0) { + if (local_read(&hvcsd->open_count) > 0) { spin_unlock_irqrestore(&hvcsd->lock, flags); printk(KERN_INFO "HVCS: vterm state unchanged. " "The hvcs device node is still in use.\n"); @@ -1135,7 +1136,7 @@ static int hvcs_open(struct tty_struct *tty, struct file *filp) if ((retval = hvcs_partner_connect(hvcsd))) goto error_release; - hvcsd->open_count = 1; + local_set(&hvcsd->open_count, 1); hvcsd->tty = tty; tty->driver_data = hvcsd; @@ -1169,7 +1170,7 @@ fast_open: spin_lock_irqsave(&hvcsd->lock, flags); kref_get(&hvcsd->kref); - hvcsd->open_count++; + local_inc(&hvcsd->open_count); hvcsd->todo_mask |= HVCS_SCHED_READ; spin_unlock_irqrestore(&hvcsd->lock, flags); @@ -1213,7 +1214,7 @@ static void hvcs_close(struct tty_struct *tty, struct file *filp) hvcsd = tty->driver_data; spin_lock_irqsave(&hvcsd->lock, flags); - if (--hvcsd->open_count == 0) { + if (local_dec_and_test(&hvcsd->open_count)) { vio_disable_interrupts(hvcsd->vdev); @@ -1239,10 +1240,10 @@ static void hvcs_close(struct tty_struct *tty, struct file *filp) free_irq(irq, hvcsd); kref_put(&hvcsd->kref, destroy_hvcs_struct); return; - } else if (hvcsd->open_count < 0) { + } else if (local_read(&hvcsd->open_count) < 0) { printk(KERN_ERR "HVCS: vty-server@%X open_count: %d" " is missmanaged.\n", - hvcsd->vdev->unit_address, hvcsd->open_count); + hvcsd->vdev->unit_address, local_read(&hvcsd->open_count)); } spin_unlock_irqrestore(&hvcsd->lock, flags); @@ -1258,7 +1259,7 @@ static void hvcs_hangup(struct tty_struct * tty) spin_lock_irqsave(&hvcsd->lock, flags); /* Preserve this so that we know how many kref refs to put */ - temp_open_count = hvcsd->open_count; + temp_open_count = local_read(&hvcsd->open_count); /* * Don't kref put inside the spinlock because the destruction @@ -1273,7 +1274,7 @@ static void hvcs_hangup(struct tty_struct * tty) hvcsd->tty->driver_data = NULL; hvcsd->tty = NULL; - hvcsd->open_count = 0; + local_set(&hvcsd->open_count, 0); /* This will drop any buffered data on the floor which is OK in a hangup * scenario. */ @@ -1344,7 +1345,7 @@ static int hvcs_write(struct tty_struct *tty, * the middle of a write operation? This is a crummy place to do this * but we want to keep it all in the spinlock. */ - if (hvcsd->open_count <= 0) { + if (local_read(&hvcsd->open_count) <= 0) { spin_unlock_irqrestore(&hvcsd->lock, flags); return -ENODEV; } @@ -1418,7 +1419,7 @@ static int hvcs_write_room(struct tty_struct *tty) { struct hvcs_struct *hvcsd = tty->driver_data; - if (!hvcsd || hvcsd->open_count <= 0) + if (!hvcsd || local_read(&hvcsd->open_count) <= 0) return 0; return HVCS_BUFF_LEN - hvcsd->chars_in_buffer; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index ec5e3f8..02455ba 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -414,7 +414,7 @@ struct ipmi_smi { struct proc_dir_entry *proc_dir; char proc_dir_name[10]; - atomic_t stats[IPMI_NUM_STATS]; + atomic_unchecked_t stats[IPMI_NUM_STATS]; /* * run_to_completion duplicate of smb_info, smi_info @@ -447,9 +447,9 @@ static DEFINE_MUTEX(smi_watchers_mutex); #define ipmi_inc_stat(intf, stat) \ - atomic_inc(&(intf)->stats[IPMI_STAT_ ## stat]) + atomic_inc_unchecked(&(intf)->stats[IPMI_STAT_ ## stat]) #define ipmi_get_stat(intf, stat) \ - ((unsigned int) atomic_read(&(intf)->stats[IPMI_STAT_ ## stat])) + ((unsigned int) atomic_read_unchecked(&(intf)->stats[IPMI_STAT_ ## stat])) static int is_lan_addr(struct ipmi_addr *addr) { @@ -2808,7 +2808,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, INIT_LIST_HEAD(&intf->cmd_rcvrs); init_waitqueue_head(&intf->waitq); for (i = 0; i < IPMI_NUM_STATS; i++) - atomic_set(&intf->stats[i], 0); + atomic_set_unchecked(&intf->stats[i], 0); intf->proc_dir = NULL; @@ -4160,6 +4160,8 @@ static void send_panic_events(char *str) struct ipmi_smi_msg smi_msg; struct ipmi_recv_msg recv_msg; + pax_track_stack(); + si = (struct ipmi_system_interface_addr *) &addr; si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; si->channel = IPMI_BMC_CHANNEL; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index abae8c9..8021979 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -277,7 +277,7 @@ struct smi_info { unsigned char slave_addr; /* Counters and things for the proc filesystem. */ - atomic_t stats[SI_NUM_STATS]; + atomic_unchecked_t stats[SI_NUM_STATS]; struct task_struct *thread; @@ -285,9 +285,9 @@ struct smi_info { }; #define smi_inc_stat(smi, stat) \ - atomic_inc(&(smi)->stats[SI_STAT_ ## stat]) + atomic_inc_unchecked(&(smi)->stats[SI_STAT_ ## stat]) #define smi_get_stat(smi, stat) \ - ((unsigned int) atomic_read(&(smi)->stats[SI_STAT_ ## stat])) + ((unsigned int) atomic_read_unchecked(&(smi)->stats[SI_STAT_ ## stat])) #define SI_MAX_PARMS 4 @@ -2931,7 +2931,7 @@ static int try_smi_init(struct smi_info *new_smi) atomic_set(&new_smi->req_events, 0); new_smi->run_to_completion = 0; for (i = 0; i < SI_NUM_STATS; i++) - atomic_set(&new_smi->stats[i], 0); + atomic_set_unchecked(&new_smi->stats[i], 0); new_smi->interrupt_disabled = 0; atomic_set(&new_smi->stop_operation, 0); diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 402838f..55e2200 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -187,7 +187,6 @@ static struct ktermios stli_deftermios = { * re-used for each stats call. */ static comstats_t stli_comstats; -static combrd_t stli_brdstats; static struct asystats stli_cdkstats; /*****************************************************************************/ @@ -4058,6 +4057,7 @@ static int stli_getbrdstats(combrd_t __user *bp) { struct stlibrd *brdp; unsigned int i; + combrd_t stli_brdstats; if (copy_from_user(&stli_brdstats, bp, sizeof(combrd_t))) return -EFAULT; @@ -4269,6 +4269,8 @@ static int stli_getportstruct(struct stliport __user *arg) struct stliport stli_dummyport; struct stliport *portp; + pax_track_stack(); + if (copy_from_user(&stli_dummyport, arg, sizeof(struct stliport))) return -EFAULT; portp = stli_getport(stli_dummyport.brdnr, stli_dummyport.panelnr, @@ -4291,6 +4293,8 @@ static int stli_getbrdstruct(struct stlibrd __user *arg) struct stlibrd stli_dummybrd; struct stlibrd *brdp; + pax_track_stack(); + if (copy_from_user(&stli_dummybrd, arg, sizeof(struct stlibrd))) return -EFAULT; if (stli_dummybrd.brdnr >= STL_MAXBRDS) diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 950837c..e55a288 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -635,6 +635,16 @@ static void k_spec(struct vc_data *vc, unsigned char value, char up_flag) kbd->kbdmode == VC_MEDIUMRAW) && value != KVAL(K_SAK)) return; /* SAK is allowed even in raw mode */ + +#if defined(CONFIG_GRKERNSEC_PROC) || defined(CONFIG_GRKERNSEC_PROC_MEMMAP) + { + void *func = fn_handler[value]; + if (func == fn_show_state || func == fn_show_ptregs || + func == fn_show_mem) + return; + } +#endif + fn_handler[value](vc); } @@ -1386,7 +1396,7 @@ static const struct input_device_id kbd_ids[] = { .evbit = { BIT_MASK(EV_SND) }, }, - { }, /* Terminating entry */ + { 0 }, /* Terminating entry */ }; MODULE_DEVICE_TABLE(input, kbd_ids); diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c index 87c67b4..230527a 100644 --- a/drivers/char/mbcs.c +++ b/drivers/char/mbcs.c @@ -799,7 +799,7 @@ static int mbcs_remove(struct cx_dev *dev) return 0; } -static const struct cx_device_id __devinitdata mbcs_id_table[] = { +static const struct cx_device_id __devinitconst mbcs_id_table[] = { { .part_num = MBCS_PART_NUM, .mfg_num = MBCS_MFG_NUM, diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 1270f64..8495f49 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,10 @@ # include #endif +#if defined(CONFIG_GRKERNSEC) && !defined(CONFIG_GRKERNSEC_NO_RBAC) +extern struct file_operations grsec_fops; +#endif + static inline unsigned long size_inside_page(unsigned long start, unsigned long size) { @@ -102,9 +107,13 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_mem_readwrite(from, to); +#else printk(KERN_INFO "Program %s tried to access /dev/mem between %Lx->%Lx.\n", current->comm, from, to); +#endif return 0; } cursor += PAGE_SIZE; @@ -112,6 +121,11 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } return 1; } +#elif defined(CONFIG_GRKERNSEC_KMEM) +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 0; +} #else static inline int range_is_allowed(unsigned long pfn, unsigned long size) { @@ -155,6 +169,8 @@ static ssize_t read_mem(struct file * file, char __user * buf, #endif while (count > 0) { + char *temp; + /* * Handle first page in case it's not aligned */ @@ -177,11 +193,31 @@ static ssize_t read_mem(struct file * file, char __user * buf, if (!ptr) return -EFAULT; - if (copy_to_user(buf, ptr, sz)) { +#ifdef CONFIG_PAX_USERCOPY + temp = kmalloc(sz, GFP_KERNEL); + if (!temp) { + unxlate_dev_mem_ptr(p, ptr); + return -ENOMEM; + } + memcpy(temp, ptr, sz); +#else + temp = ptr; +#endif + + if (copy_to_user(buf, temp, sz)) { + +#ifdef CONFIG_PAX_USERCOPY + kfree(temp); +#endif + unxlate_dev_mem_ptr(p, ptr); return -EFAULT; } +#ifdef CONFIG_PAX_USERCOPY + kfree(temp); +#endif + unxlate_dev_mem_ptr(p, ptr); buf += sz; @@ -419,9 +455,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; - ssize_t low_count, read, sz; + ssize_t low_count, read, sz, err = 0; char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ - int err = 0; read = 0; if (p < (unsigned long) high_memory) { @@ -444,6 +479,8 @@ static ssize_t read_kmem(struct file *file, char __user *buf, } #endif while (low_count > 0) { + char *temp; + sz = size_inside_page(p, low_count); /* @@ -453,7 +490,22 @@ static ssize_t read_kmem(struct file *file, char __user *buf, */ kbuf = xlate_dev_kmem_ptr((char *)p); - if (copy_to_user(buf, kbuf, sz)) +#ifdef CONFIG_PAX_USERCOPY + temp = kmalloc(sz, GFP_KERNEL); + if (!temp) + return -ENOMEM; + memcpy(temp, kbuf, sz); +#else + temp = kbuf; +#endif + + err = copy_to_user(buf, temp, sz); + +#ifdef CONFIG_PAX_USERCOPY + kfree(temp); +#endif + + if (err) return -EFAULT; buf += sz; p += sz; @@ -889,6 +941,9 @@ static const struct memdev { #ifdef CONFIG_CRASH_DUMP [12] = { "oldmem", 0, &oldmem_fops, NULL }, #endif +#if defined(CONFIG_GRKERNSEC) && !defined(CONFIG_GRKERNSEC_NO_RBAC) + [13] = { "grsec",S_IRUSR | S_IWUGO, &grsec_fops, NULL }, +#endif }; static int memory_open(struct inode *inode, struct file *filp) diff --git a/drivers/char/pcmcia/ipwireless/tty.c b/drivers/char/pcmcia/ipwireless/tty.c index 674b3ab..a8d1970 100644 --- a/drivers/char/pcmcia/ipwireless/tty.c +++ b/drivers/char/pcmcia/ipwireless/tty.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "tty.h" #include "network.h" @@ -51,7 +52,7 @@ struct ipw_tty { int tty_type; struct ipw_network *network; struct tty_struct *linux_tty; - int open_count; + local_t open_count; unsigned int control_lines; struct mutex ipw_tty_mutex; int tx_bytes_queued; @@ -127,10 +128,10 @@ static int ipw_open(struct tty_struct *linux_tty, struct file *filp) mutex_unlock(&tty->ipw_tty_mutex); return -ENODEV; } - if (tty->open_count == 0) + if (local_read(&tty->open_count) == 0) tty->tx_bytes_queued = 0; - tty->open_count++; + local_inc(&tty->open_count); tty->linux_tty = linux_tty; linux_tty->driver_data = tty; @@ -146,9 +147,7 @@ static int ipw_open(struct tty_struct *linux_tty, struct file *filp) static void do_ipw_close(struct ipw_tty *tty) { - tty->open_count--; - - if (tty->open_count == 0) { + if (local_dec_return(&tty->open_count) == 0) { struct tty_struct *linux_tty = tty->linux_tty; if (linux_tty != NULL) { @@ -169,7 +168,7 @@ static void ipw_hangup(struct tty_struct *linux_tty) return; mutex_lock(&tty->ipw_tty_mutex); - if (tty->open_count == 0) { + if (local_read(&tty->open_count) == 0) { mutex_unlock(&tty->ipw_tty_mutex); return; } @@ -198,7 +197,7 @@ void ipwireless_tty_received(struct ipw_tty *tty, unsigned char *data, return; } - if (!tty->open_count) { + if (!local_read(&tty->open_count)) { mutex_unlock(&tty->ipw_tty_mutex); return; } @@ -240,7 +239,7 @@ static int ipw_write(struct tty_struct *linux_tty, return -ENODEV; mutex_lock(&tty->ipw_tty_mutex); - if (!tty->open_count) { + if (!local_read(&tty->open_count)) { mutex_unlock(&tty->ipw_tty_mutex); return -EINVAL; } @@ -280,7 +279,7 @@ static int ipw_write_room(struct tty_struct *linux_tty) if (!tty) return -ENODEV; - if (!tty->open_count) + if (!local_read(&tty->open_count)) return -EINVAL; room = IPWIRELESS_TX_QUEUE_SIZE - tty->tx_bytes_queued; @@ -322,7 +321,7 @@ static int ipw_chars_in_buffer(struct tty_struct *linux_tty) if (!tty) return 0; - if (!tty->open_count) + if (!local_read(&tty->open_count)) return 0; return tty->tx_bytes_queued; @@ -403,7 +402,7 @@ static int ipw_tiocmget(struct tty_struct *linux_tty, struct file *file) if (!tty) return -ENODEV; - if (!tty->open_count) + if (!local_read(&tty->open_count)) return -EINVAL; return get_control_lines(tty); @@ -419,7 +418,7 @@ ipw_tiocmset(struct tty_struct *linux_tty, struct file *file, if (!tty) return -ENODEV; - if (!tty->open_count) + if (!local_read(&tty->open_count)) return -EINVAL; return set_control_lines(tty, set, clear); @@ -433,7 +432,7 @@ static int ipw_ioctl(struct tty_struct *linux_tty, struct file *file, if (!tty) return -ENODEV; - if (!tty->open_count) + if (!local_read(&tty->open_count)) return -EINVAL; /* FIXME: Exactly how is the tty object locked here .. */ @@ -591,7 +590,7 @@ void ipwireless_tty_free(struct ipw_tty *tty) against a parallel ioctl etc */ mutex_lock(&ttyj->ipw_tty_mutex); } - while (ttyj->open_count) + while (local_read(&ttyj->open_count)) do_ipw_close(ttyj); ipwireless_disassociate_network_ttys(network, ttyj->channel_idx); diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 62f282e..e45c45c 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -736,8 +736,10 @@ static void __init unix98_pty_init(void) register_sysctl_table(pty_root_table); /* Now create the /dev/ptmx special device */ + pax_open_kernel(); tty_default_fops(&ptmx_fops); - ptmx_fops.open = ptmx_open; + *(void **)&ptmx_fops.open = ptmx_open; + pax_close_kernel(); cdev_init(&ptmx_cdev, &ptmx_fops); if (cdev_add(&ptmx_cdev, MKDEV(TTYAUX_MAJOR, 2), 1) || diff --git a/drivers/char/random.c b/drivers/char/random.c index 3a19e2d..6ed09d3 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -254,8 +254,13 @@ /* * Configuration information */ +#ifdef CONFIG_GRKERNSEC_RANDNET +#define INPUT_POOL_WORDS 512 +#define OUTPUT_POOL_WORDS 128 +#else #define INPUT_POOL_WORDS 128 #define OUTPUT_POOL_WORDS 32 +#endif #define SEC_XFER_SIZE 512 /* @@ -292,10 +297,17 @@ static struct poolinfo { int poolwords; int tap1, tap2, tap3, tap4, tap5; } poolinfo_table[] = { +#ifdef CONFIG_GRKERNSEC_RANDNET + /* x^512 + x^411 + x^308 + x^208 +x^104 + x + 1 -- 225 */ + { 512, 411, 308, 208, 104, 1 }, + /* x^128 + x^103 + x^76 + x^51 + x^25 + x + 1 -- 105 */ + { 128, 103, 76, 51, 25, 1 }, +#else /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */ { 128, 103, 76, 51, 25, 1 }, /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */ { 32, 26, 20, 14, 7, 1 }, +#endif #if 0 /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ { 2048, 1638, 1231, 819, 411, 1 }, @@ -1209,7 +1221,7 @@ EXPORT_SYMBOL(generate_random_uuid); #include static int min_read_thresh = 8, min_write_thresh; -static int max_read_thresh = INPUT_POOL_WORDS * 32; +static int max_read_thresh = OUTPUT_POOL_WORDS * 32; static int max_write_thresh = INPUT_POOL_WORDS * 32; static char sysctl_bootid[16]; diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 0e29a23..0efc2c2 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -1266,6 +1266,8 @@ static int get_ports(struct r_port *info, struct rocket_ports __user *retports) struct rocket_ports tmp; int board; + pax_track_stack(); + if (!retports) return -EFAULT; memset(&tmp, 0, sizeof (tmp)); diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index 8c262aa..4d3b058 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -55,6 +55,7 @@ #include #include #include +#include #include @@ -491,7 +492,7 @@ static struct sonypi_device { spinlock_t fifo_lock; wait_queue_head_t fifo_proc_list; struct fasync_struct *fifo_async; - int open_count; + local_t open_count; int model; struct input_dev *input_jog_dev; struct input_dev *input_key_dev; @@ -895,7 +896,7 @@ static int sonypi_misc_fasync(int fd, struct file *filp, int on) static int sonypi_misc_release(struct inode *inode, struct file *file) { mutex_lock(&sonypi_device.lock); - sonypi_device.open_count--; + local_dec(&sonypi_device.open_count); mutex_unlock(&sonypi_device.lock); return 0; } @@ -905,9 +906,9 @@ static int sonypi_misc_open(struct inode *inode, struct file *file) lock_kernel(); mutex_lock(&sonypi_device.lock); /* Flush input queue on first open */ - if (!sonypi_device.open_count) + if (!local_read(&sonypi_device.open_count)) kfifo_reset(sonypi_device.fifo); - sonypi_device.open_count++; + local_inc(&sonypi_device.open_count); mutex_unlock(&sonypi_device.lock); unlock_kernel(); return 0; diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index db6dcfa..13834cb 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -2448,6 +2448,8 @@ static int stl_getportstruct(struct stlport __user *arg) struct stlport stl_dummyport; struct stlport *portp; + pax_track_stack(); + if (copy_from_user(&stl_dummyport, arg, sizeof(struct stlport))) return -EFAULT; portp = stl_getport(stl_dummyport.brdnr, stl_dummyport.panelnr, diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index a0789f6..cea3902 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -405,7 +405,7 @@ static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, chip->vendor.req_complete_val) goto out_recv; - if ((status == chip->vendor.req_canceled)) { + if (status == chip->vendor.req_canceled) { dev_err(chip->dev, "Operation Canceled\n"); rc = -ECANCELED; goto out; @@ -824,6 +824,8 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, struct tpm_chip *chip = dev_get_drvdata(dev); + pax_track_stack(); + tpm_cmd.header.in = tpm_readpubek_header; err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE, "attempting to read the PUBEK"); diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c index bf2170f..ce8cab9 100644 --- a/drivers/char/tpm/tpm_bios.c +++ b/drivers/char/tpm/tpm_bios.c @@ -172,7 +172,7 @@ static void *tpm_bios_measurements_start(struct seq_file *m, loff_t *pos) event = addr; if ((event->event_type == 0 && event->event_size == 0) || - ((addr + sizeof(struct tcpa_event) + event->event_size) >= limit)) + (event->event_size >= limit - addr - sizeof(struct tcpa_event))) return NULL; return addr; @@ -197,7 +197,7 @@ static void *tpm_bios_measurements_next(struct seq_file *m, void *v, return NULL; if ((event->event_type == 0 && event->event_size == 0) || - ((v + sizeof(struct tcpa_event) + event->event_size) >= limit)) + (event->event_size >= limit - v - sizeof(struct tcpa_event))) return NULL; (*pos)++; @@ -290,7 +290,8 @@ static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v) int i; for (i = 0; i < sizeof(struct tcpa_event) + event->event_size; i++) - seq_putc(m, data[i]); + if (!seq_putc(m, data[i])) + return -EFAULT; return 0; } @@ -409,8 +410,13 @@ static int read_log(struct tpm_bios_log *log) log->bios_event_log_end = log->bios_event_log + len; virt = acpi_os_map_memory(start, len); + if (!virt) { + kfree(log->bios_event_log); + log->bios_event_log = NULL; + return -EFAULT; + } - memcpy(log->bios_event_log, virt, len); + memcpy(log->bios_event_log, (const char __force_kernel *)virt, len); acpi_os_unmap_memory(virt, len); return 0; diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 123cedf..137edef 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1774,6 +1774,7 @@ got_driver: if (IS_ERR(tty)) { mutex_unlock(&tty_mutex); + tty_driver_kref_put(driver); return PTR_ERR(tty); } } @@ -2603,8 +2604,10 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return retval; } +EXPORT_SYMBOL(tty_ioctl); + #ifdef CONFIG_COMPAT -static long tty_compat_ioctl(struct file *file, unsigned int cmd, +long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; @@ -2628,6 +2631,8 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, return retval; } + +EXPORT_SYMBOL(tty_compat_ioctl); #endif /* @@ -3073,7 +3078,7 @@ EXPORT_SYMBOL_GPL(get_current_tty); void tty_default_fops(struct file_operations *fops) { - *fops = tty_fops; + memcpy((void *)fops, &tty_fops, sizeof(tty_fops)); } /* diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index d814a3d..b55b9c9 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -74,7 +74,7 @@ static void put_ldisc(struct tty_ldisc *ld) if (atomic_dec_and_lock(&ld->users, &tty_ldisc_lock)) { struct tty_ldisc_ops *ldo = ld->ops; - ldo->refcount--; + atomic_dec(&ldo->refcount); module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); @@ -109,7 +109,7 @@ int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc) spin_lock_irqsave(&tty_ldisc_lock, flags); tty_ldiscs[disc] = new_ldisc; new_ldisc->num = disc; - new_ldisc->refcount = 0; + atomic_set(&new_ldisc->refcount, 0); spin_unlock_irqrestore(&tty_ldisc_lock, flags); return ret; @@ -137,7 +137,7 @@ int tty_unregister_ldisc(int disc) return -EINVAL; spin_lock_irqsave(&tty_ldisc_lock, flags); - if (tty_ldiscs[disc]->refcount) + if (atomic_read(&tty_ldiscs[disc]->refcount)) ret = -EBUSY; else tty_ldiscs[disc] = NULL; @@ -158,7 +158,7 @@ static struct tty_ldisc_ops *get_ldops(int disc) if (ldops) { ret = ERR_PTR(-EAGAIN); if (try_module_get(ldops->owner)) { - ldops->refcount++; + atomic_inc(&ldops->refcount); ret = ldops; } } @@ -171,7 +171,7 @@ static void put_ldops(struct tty_ldisc_ops *ldops) unsigned long flags; spin_lock_irqsave(&tty_ldisc_lock, flags); - ldops->refcount--; + atomic_dec(&ldops->refcount); module_put(ldops->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); } diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index a035ae3..c27fe2c 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -133,7 +133,9 @@ static int get_chars(u32 vtermno, char *buf, int count) * virtqueue, so we let the drivers do some boutique early-output thing. */ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) { - virtio_cons.put_chars = put_chars; + pax_open_kernel(); + *(void **)&virtio_cons.put_chars = put_chars; + pax_close_kernel(); return hvc_instantiate(0, 0, &virtio_cons); } @@ -213,11 +215,13 @@ static int __devinit virtcons_probe(struct virtio_device *dev) out_vq = vqs[1]; /* Start using the new console output. */ - virtio_cons.get_chars = get_chars; - virtio_cons.put_chars = put_chars; - virtio_cons.notifier_add = notifier_add_vio; - virtio_cons.notifier_del = notifier_del_vio; - virtio_cons.notifier_hangup = notifier_del_vio; + pax_open_kernel(); + *(void **)&virtio_cons.get_chars = get_chars; + *(void **)&virtio_cons.put_chars = put_chars; + *(void **)&virtio_cons.notifier_add = notifier_add_vio; + *(void **)&virtio_cons.notifier_del = notifier_del_vio; + *(void **)&virtio_cons.notifier_hangup = notifier_del_vio; + pax_close_kernel(); /* The first argument of hvc_alloc() is the virtual console number, so * we use zero. The second argument is the parameter for the diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 0c80c68..53d59c1 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -243,7 +243,7 @@ EXPORT_SYMBOL_GPL(unregister_vt_notifier); static void notify_write(struct vc_data *vc, unsigned int unicode) { - struct vt_notifier_param param = { .vc = vc, unicode = unicode }; + struct vt_notifier_param param = { .vc = vc, .c = unicode }; atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, ¶m); } diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 6351a26..999af95 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -210,9 +210,6 @@ do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, struct kbd_str if (copy_from_user(&tmp, user_kbe, sizeof(struct kbentry))) return -EFAULT; - if (!capable(CAP_SYS_TTY_CONFIG)) - perm = 0; - switch (cmd) { case KDGKBENT: key_map = key_maps[s]; @@ -224,8 +221,12 @@ do_kdsk_ioctl(int cmd, struct kbentry __user *user_kbe, int perm, struct kbd_str val = (i ? K_HOLE : K_NOSUCHMAP); return put_user(val, &user_kbe->kb_value); case KDSKBENT: + if (!capable(CAP_SYS_TTY_CONFIG)) + perm = 0; + if (!perm) return -EPERM; + if (!i && v == K_NOSUCHMAP) { /* deallocate map */ key_map = key_maps[s]; @@ -325,9 +326,6 @@ do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) int i, j, k; int ret; - if (!capable(CAP_SYS_TTY_CONFIG)) - perm = 0; - kbs = kmalloc(sizeof(*kbs), GFP_KERNEL); if (!kbs) { ret = -ENOMEM; @@ -361,6 +359,9 @@ do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) kfree(kbs); return ((p && *p) ? -EOVERFLOW : 0); case KDSKBSENT: + if (!capable(CAP_SYS_TTY_CONFIG)) + perm = 0; + if (!perm) { ret = -EPERM; goto reterr; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c7ae026..1769c1d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -750,7 +750,7 @@ static void cpufreq_sysfs_release(struct kobject *kobj) complete(&policy->kobj_unregister); } -static struct sysfs_ops sysfs_ops = { +static const struct sysfs_ops sysfs_ops = { .show = show, .store = store, }; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 97b0038..2056670 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -191,7 +191,7 @@ static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, return ret; } -static struct sysfs_ops cpuidle_sysfs_ops = { +static const struct sysfs_ops cpuidle_sysfs_ops = { .show = cpuidle_show, .store = cpuidle_store, }; @@ -277,7 +277,7 @@ static ssize_t cpuidle_state_show(struct kobject * kobj, return ret; } -static struct sysfs_ops cpuidle_state_sysfs_ops = { +static const struct sysfs_ops cpuidle_state_sysfs_ops = { .show = cpuidle_state_show, }; @@ -294,7 +294,7 @@ static struct kobj_type ktype_state_cpuidle = { .release = cpuidle_state_sysfs_release, }; -static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i) +static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) { kobject_put(&device->kobjs[i]->kobj); wait_for_completion(&device->kobjs[i]->kobj_unregister); diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index 5f753fc..0377ae9 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -1655,6 +1655,8 @@ static int hifn_test(struct hifn_device *dev, int encdec, u8 snum) 0xCA, 0x34, 0x2B, 0x2E}; struct scatterlist sg; + pax_track_stack(); + memset(src, 0, sizeof(src)); memset(ctx.key, 0, sizeof(ctx.key)); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 71e6482..de8d96c 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -108,6 +108,8 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct crypto_aes_ctx gen_aes; int cpu; + pax_track_stack(); + if (key_len % 8) { *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index dcc4ab7..cc834bb 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1146,7 +1146,7 @@ ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page) return entry->show(&chan->common, page); } -struct sysfs_ops ioat_sysfs_ops = { +const struct sysfs_ops ioat_sysfs_ops = { .show = ioat_attr_show, }; diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index bbc3e78..f2db62c 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -347,7 +347,7 @@ bool ioat_cleanup_preamble(struct ioat_chan_common *chan, unsigned long *phys_complete); void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); void ioat_kobject_del(struct ioatdma_device *device); -extern struct sysfs_ops ioat_sysfs_ops; +extern const struct sysfs_ops ioat_sysfs_ops; extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 9908c9e..3ceb0e5 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -71,10 +71,10 @@ /* provide a lookup table for setting the source address in the base or * extended descriptor of an xor or pq descriptor */ -static const u8 xor_idx_to_desc __read_mostly = 0xd0; -static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; -static const u8 pq_idx_to_desc __read_mostly = 0xf8; -static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; +static const u8 xor_idx_to_desc = 0xd0; +static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; +static const u8 pq_idx_to_desc = 0xf8; +static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) { diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 85c464a..afd1e73 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3099,7 +3099,7 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) * PCI core identifies what devices are on a system during boot, and then * inquiry this table to see if this driver is for a given device found. */ -static const struct pci_device_id amd64_pci_table[] __devinitdata = { +static const struct pci_device_id amd64_pci_table[] __devinitconst = { { .vendor = PCI_VENDOR_ID_AMD, .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index 2b95f1a..4f52793 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -322,7 +322,7 @@ static void __devexit amd76x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id amd76x_pci_tbl[] __devinitdata = { +static const struct pci_device_id amd76x_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(AMD, FE_GATE_700C), PCI_ANY_ID, PCI_ANY_ID, 0, 0, AMD762}, diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index d205d49..74c9672 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -1282,7 +1282,7 @@ static void __devexit e752x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { +static const struct pci_device_id e752x_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 7520_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7520}, diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index c7d11cc..c59c1ca 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -526,7 +526,7 @@ static void __devexit e7xxx_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id e7xxx_pci_tbl[] __devinitdata = { +static const struct pci_device_id e7xxx_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 7205_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7205}, diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 5376457..5fdedbc 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -137,7 +137,7 @@ static ssize_t edac_dev_ctl_info_store(struct kobject *kobj, } /* edac_dev file operations for an 'ctl_info' */ -static struct sysfs_ops device_ctl_info_ops = { +static const struct sysfs_ops device_ctl_info_ops = { .show = edac_dev_ctl_info_show, .store = edac_dev_ctl_info_store }; @@ -373,7 +373,7 @@ static ssize_t edac_dev_instance_store(struct kobject *kobj, } /* edac_dev file operations for an 'instance' */ -static struct sysfs_ops device_instance_ops = { +static const struct sysfs_ops device_instance_ops = { .show = edac_dev_instance_show, .store = edac_dev_instance_store }; @@ -476,7 +476,7 @@ static ssize_t edac_dev_block_store(struct kobject *kobj, } /* edac_dev file operations for a 'block' */ -static struct sysfs_ops device_block_ops = { +static const struct sysfs_ops device_block_ops = { .show = edac_dev_block_show, .store = edac_dev_block_store }; diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index e1d4ce0..88840e9 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -245,7 +245,7 @@ static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, return -EIO; } -static struct sysfs_ops csrowfs_ops = { +static const struct sysfs_ops csrowfs_ops = { .show = csrowdev_show, .store = csrowdev_store }; @@ -575,7 +575,7 @@ static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, } /* Intermediate show/store table */ -static struct sysfs_ops mci_ops = { +static const struct sysfs_ops mci_ops = { .show = mcidev_show, .store = mcidev_store }; diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 422728c..d8d9c88 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -25,8 +25,8 @@ static int edac_pci_log_pe = 1; /* log PCI parity errors */ static int edac_pci_log_npe = 1; /* log PCI non-parity error errors */ static int edac_pci_poll_msec = 1000; /* one second workq period */ -static atomic_t pci_parity_count = ATOMIC_INIT(0); -static atomic_t pci_nonparity_count = ATOMIC_INIT(0); +static atomic_unchecked_t pci_parity_count = ATOMIC_INIT(0); +static atomic_unchecked_t pci_nonparity_count = ATOMIC_INIT(0); static struct kobject *edac_pci_top_main_kobj; static atomic_t edac_pci_sysfs_refcount = ATOMIC_INIT(0); @@ -121,7 +121,7 @@ static ssize_t edac_pci_instance_store(struct kobject *kobj, } /* fs_ops table */ -static struct sysfs_ops pci_instance_ops = { +static const struct sysfs_ops pci_instance_ops = { .show = edac_pci_instance_show, .store = edac_pci_instance_store }; @@ -261,7 +261,7 @@ static ssize_t edac_pci_dev_store(struct kobject *kobj, return -EIO; } -static struct sysfs_ops edac_pci_sysfs_ops = { +static const struct sysfs_ops edac_pci_sysfs_ops = { .show = edac_pci_dev_show, .store = edac_pci_dev_store }; @@ -579,7 +579,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) edac_printk(KERN_CRIT, EDAC_PCI, "Signaled System Error on %s\n", pci_name(dev)); - atomic_inc(&pci_nonparity_count); + atomic_inc_unchecked(&pci_nonparity_count); } if (status & (PCI_STATUS_PARITY)) { @@ -587,7 +587,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) "Master Data Parity Error on %s\n", pci_name(dev)); - atomic_inc(&pci_parity_count); + atomic_inc_unchecked(&pci_parity_count); } if (status & (PCI_STATUS_DETECTED_PARITY)) { @@ -595,7 +595,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) "Detected Parity Error on %s\n", pci_name(dev)); - atomic_inc(&pci_parity_count); + atomic_inc_unchecked(&pci_parity_count); } } @@ -616,7 +616,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " "Signaled System Error on %s\n", pci_name(dev)); - atomic_inc(&pci_nonparity_count); + atomic_inc_unchecked(&pci_nonparity_count); } if (status & (PCI_STATUS_PARITY)) { @@ -624,7 +624,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) "Master Data Parity Error on " "%s\n", pci_name(dev)); - atomic_inc(&pci_parity_count); + atomic_inc_unchecked(&pci_parity_count); } if (status & (PCI_STATUS_DETECTED_PARITY)) { @@ -632,7 +632,7 @@ static void edac_pci_dev_parity_test(struct pci_dev *dev) "Detected Parity Error on %s\n", pci_name(dev)); - atomic_inc(&pci_parity_count); + atomic_inc_unchecked(&pci_parity_count); } } } @@ -674,7 +674,7 @@ void edac_pci_do_parity_check(void) if (!check_pci_errors) return; - before_count = atomic_read(&pci_parity_count); + before_count = atomic_read_unchecked(&pci_parity_count); /* scan all PCI devices looking for a Parity Error on devices and * bridges. @@ -686,7 +686,7 @@ void edac_pci_do_parity_check(void) /* Only if operator has selected panic on PCI Error */ if (edac_pci_get_panic_on_pe()) { /* If the count is different 'after' from 'before' */ - if (before_count != atomic_read(&pci_parity_count)) + if (before_count != atomic_read_unchecked(&pci_parity_count)) panic("EDAC: PCI Parity Error"); } } diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index 6c9a0f2..9c1cf7e 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -471,7 +471,7 @@ static void __devexit i3000_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i3000_pci_tbl[] __devinitdata = { +static const struct pci_device_id i3000_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 3000_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3000}, diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index fde4db9..fe108f9 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -444,7 +444,7 @@ static void __devexit i3200_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i3200_pci_tbl[] __devinitdata = { +static const struct pci_device_id i3200_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I3200}, diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index adc10a2..57d4ccf 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1516,7 +1516,7 @@ static void __devexit i5000_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static const struct pci_device_id i5000_pci_tbl[] __devinitdata = { +static const struct pci_device_id i5000_pci_tbl[] __devinitconst = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I5000_DEV16), .driver_data = I5000P}, diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 22db05a..b2b5503 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -944,7 +944,7 @@ static void __devexit i5100_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i5100_pci_tbl[] __devinitdata = { +static const struct pci_device_id i5100_pci_tbl[] __devinitconst = { /* Device 16, Function 0, Channel 0 Memory Map, Error Flag/Mask, ... */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5100_16) }, { 0, } diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index f99d106..f050710 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1383,7 +1383,7 @@ static void __devexit i5400_remove_one(struct pci_dev *pdev) * * The "E500P" device is the first device supported. */ -static const struct pci_device_id i5400_pci_tbl[] __devinitdata = { +static const struct pci_device_id i5400_pci_tbl[] __devinitconst = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_ERR)}, {0,} /* 0 terminated list. */ }; diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 577760a..9ce16ce 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -381,7 +381,7 @@ static void __devexit i82443bxgx_edacmc_remove_one(struct pci_dev *pdev) EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one); -static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82443bxgx_pci_tbl[] __devinitconst = { {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)}, {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0)}, diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index c0088ba..64a7b98 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -271,7 +271,7 @@ static void __devexit i82860_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82860_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82860_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82860}, diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index b2d83b9..a34357b 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -512,7 +512,7 @@ static void __devexit i82875p_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82875p_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82875p_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 82875_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82875P}, diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 2eed3ea..87bbbd1 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -586,7 +586,7 @@ static void __devexit i82975x_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id i82975x_pci_tbl[] __devinitdata = { +static const struct pci_device_id i82975x_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, 82975_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, I82975X diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index 9900675..78ac2b6 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -374,7 +374,7 @@ static void __devexit r82600_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id r82600_pci_tbl[] __devinitdata = { +static const struct pci_device_id r82600_pci_tbl[] __devinitconst = { { PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID) }, diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index d4ec605..4cfec4e 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -441,7 +441,7 @@ static void __devexit x38_remove_one(struct pci_dev *pdev) edac_mc_free(mci); } -static const struct pci_device_id x38_pci_tbl[] __devinitdata = { +static const struct pci_device_id x38_pci_tbl[] __devinitconst = { { PCI_VEND_DEV(INTEL, X38_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0, X38}, diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 3fc2ceb..daf098f 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -558,7 +558,7 @@ void fw_card_release(struct kref *kref) void fw_core_remove_card(struct fw_card *card) { - struct fw_card_driver dummy_driver = dummy_driver_template; + fw_card_driver_no_const dummy_driver = dummy_driver_template; card->driver->update_phy_reg(card, 4, PHY_LINK_ACTIVE | PHY_CONTENDER, 0); diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 4560d8f..36db24a 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1141,8 +1141,7 @@ static int init_iso_resource(struct client *client, int ret; if ((request->channels == 0 && request->bandwidth == 0) || - request->bandwidth > BANDWIDTH_AVAILABLE_INITIAL || - request->bandwidth < 0) + request->bandwidth > BANDWIDTH_AVAILABLE_INITIAL) return -EINVAL; r = kmalloc(sizeof(*r), GFP_KERNEL); diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index da628c7..cf54a2c 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -344,6 +345,8 @@ int fw_run_transaction(struct fw_card *card, int tcode, int destination_id, struct transaction_callback_data d; struct fw_transaction t; + pax_track_stack(); + init_completion(&d.done); d.payload = payload; fw_send_request(card, &t, tcode, destination_id, generation, speed, diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h index 7ff6e75..a2965d9 100644 --- a/drivers/firewire/core.h +++ b/drivers/firewire/core.h @@ -86,6 +86,7 @@ struct fw_card_driver { int (*stop_iso)(struct fw_iso_context *ctx); }; +typedef struct fw_card_driver __no_const fw_card_driver_no_const; void fw_card_initialize(struct fw_card *card, const struct fw_card_driver *driver, struct device *device); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 3a2ccb0..82fd7c4 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -391,11 +391,6 @@ void __init dmi_scan_machine(void) } } else { - /* - * no iounmap() for that ioremap(); it would be a no-op, but - * it's so early in setup that sucker gets confused into doing - * what it shouldn't if we actually call it. - */ p = dmi_ioremap(0xF0000, 0x10000); if (p == NULL) goto error; @@ -667,7 +662,7 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *), if (buf == NULL) return -1; - dmi_table(buf, dmi_len, dmi_num, decode, private_data); + dmi_table((char __force_kernel *)buf, dmi_len, dmi_num, decode, private_data); iounmap(buf); return 0; diff --git a/drivers/firmware/edd.c b/drivers/firmware/edd.c index 9e4f59d..110e24e 100644 --- a/drivers/firmware/edd.c +++ b/drivers/firmware/edd.c @@ -122,7 +122,7 @@ edd_attr_show(struct kobject * kobj, struct attribute *attr, char *buf) return ret; } -static struct sysfs_ops edd_attr_ops = { +static const struct sysfs_ops edd_attr_ops = { .show = edd_attr_show, }; diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index f4f709d..082f06e 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -362,7 +362,7 @@ static ssize_t efivar_attr_store(struct kobject *kobj, struct attribute *attr, return ret; } -static struct sysfs_ops efivar_attr_ops = { +static const struct sysfs_ops efivar_attr_ops = { .show = efivar_attr_show, .store = efivar_attr_store, }; diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 051d1eb..0a5d4e7 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -525,7 +525,7 @@ static ssize_t ibft_show_attribute(struct kobject *kobj, return ret; } -static struct sysfs_ops ibft_attr_ops = { +static const struct sysfs_ops ibft_attr_ops = { .show = ibft_show_attribute, }; diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 56f9234..8c58c7b 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -74,7 +74,7 @@ static struct attribute *def_attrs[] = { NULL }; -static struct sysfs_ops memmap_attr_ops = { +static const struct sysfs_ops memmap_attr_ops = { .show = memmap_attr_show, }; diff --git a/drivers/gpio/vr41xx_giu.c b/drivers/gpio/vr41xx_giu.c index b16c9a8..2af7d3f 100644 --- a/drivers/gpio/vr41xx_giu.c +++ b/drivers/gpio/vr41xx_giu.c @@ -204,7 +204,7 @@ static int giu_get_irq(unsigned int irq) printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n", maskl, pendl, maskh, pendh); - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); return -EINVAL; } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index bea6efc..3dc0f42 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1323,7 +1323,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, */ if ((out_resp->count_modes >= mode_count) && mode_count) { copied = 0; - mode_ptr = (struct drm_mode_modeinfo *)(unsigned long)out_resp->modes_ptr; + mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr; list_for_each_entry(mode, &connector->modes, head) { drm_crtc_convert_to_umode(&u_mode, mode); if (copy_to_user(mode_ptr + copied, @@ -1338,8 +1338,8 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if ((out_resp->count_props >= props_count) && props_count) { copied = 0; - prop_ptr = (uint32_t *)(unsigned long)(out_resp->props_ptr); - prop_values = (uint64_t *)(unsigned long)(out_resp->prop_values_ptr); + prop_ptr = (uint32_t __user *)(unsigned long)(out_resp->props_ptr); + prop_values = (uint64_t __user *)(unsigned long)(out_resp->prop_values_ptr); for (i = 0; i < DRM_CONNECTOR_MAX_PROPERTY; i++) { if (connector->property_ids[i] != 0) { if (put_user(connector->property_ids[i], @@ -1361,7 +1361,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, if ((out_resp->count_encoders >= encoders_count) && encoders_count) { copied = 0; - encoder_ptr = (uint32_t *)(unsigned long)(out_resp->encoders_ptr); + encoder_ptr = (uint32_t __user *)(unsigned long)(out_resp->encoders_ptr); for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { if (connector->encoder_ids[i] != 0) { if (put_user(connector->encoder_ids[i], @@ -1513,7 +1513,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, } for (i = 0; i < crtc_req->count_connectors; i++) { - set_connectors_ptr = (uint32_t *)(unsigned long)crtc_req->set_connectors_ptr; + set_connectors_ptr = (uint32_t __user *)(unsigned long)crtc_req->set_connectors_ptr; if (get_user(out_id, &set_connectors_ptr[i])) { ret = -EFAULT; goto out; @@ -2118,7 +2118,7 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, out_resp->flags = property->flags; if ((out_resp->count_values >= value_count) && value_count) { - values_ptr = (uint64_t *)(unsigned long)out_resp->values_ptr; + values_ptr = (uint64_t __user *)(unsigned long)out_resp->values_ptr; for (i = 0; i < value_count; i++) { if (copy_to_user(values_ptr + i, &property->values[i], sizeof(uint64_t))) { ret = -EFAULT; @@ -2131,7 +2131,7 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, if (property->flags & DRM_MODE_PROP_ENUM) { if ((out_resp->count_enum_blobs >= enum_count) && enum_count) { copied = 0; - enum_ptr = (struct drm_mode_property_enum *)(unsigned long)out_resp->enum_blob_ptr; + enum_ptr = (struct drm_mode_property_enum __user *)(unsigned long)out_resp->enum_blob_ptr; list_for_each_entry(prop_enum, &property->enum_blob_list, head) { if (copy_to_user(&enum_ptr[copied].value, &prop_enum->value, sizeof(uint64_t))) { @@ -2154,7 +2154,7 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev, if ((out_resp->count_enum_blobs >= blob_count) && blob_count) { copied = 0; blob_id_ptr = (uint32_t *)(unsigned long)out_resp->enum_blob_ptr; - blob_length_ptr = (uint32_t *)(unsigned long)out_resp->values_ptr; + blob_length_ptr = (uint32_t __user *)(unsigned long)out_resp->values_ptr; list_for_each_entry(prop_blob, &property->enum_blob_list, head) { if (put_user(prop_blob->base.id, blob_id_ptr + copied)) { @@ -2226,7 +2226,7 @@ int drm_mode_getblob_ioctl(struct drm_device *dev, blob = obj_to_blob(obj); if (out_resp->length == blob->length) { - blob_ptr = (void *)(unsigned long)out_resp->data; + blob_ptr = (void __user *)(unsigned long)out_resp->data; if (copy_to_user(blob_ptr, blob->data, blob->length)){ ret = -EFAULT; goto done; diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 1b8745d..92fdbf6 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -573,7 +573,7 @@ static bool drm_encoder_crtc_ok(struct drm_encoder *encoder, struct drm_crtc *tmp; int crtc_mask = 1; - WARN(!crtc, "checking null crtc?"); + BUG_ON(!crtc); dev = crtc->dev; @@ -642,6 +642,8 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, adjusted_mode = drm_mode_duplicate(dev, mode); + pax_track_stack(); + crtc->enabled = drm_helper_crtc_in_use(crtc); if (!crtc->enabled) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 0e27d98..dec8768 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -417,7 +417,7 @@ int drm_ioctl(struct inode *inode, struct file *filp, char *kdata = NULL; atomic_inc(&dev->ioctl_count); - atomic_inc(&dev->counts[_DRM_STAT_IOCTLS]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_IOCTLS]); ++file_priv->ioctl_count; DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n", diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index ba14553..182d0bb 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -66,7 +66,7 @@ static int drm_setup(struct drm_device * dev) } for (i = 0; i < ARRAY_SIZE(dev->counts); i++) - atomic_set(&dev->counts[i], 0); + atomic_set_unchecked(&dev->counts[i], 0); dev->sigdata.lock = NULL; @@ -130,9 +130,9 @@ int drm_open(struct inode *inode, struct file *filp) retcode = drm_open_helper(inode, filp, dev); if (!retcode) { - atomic_inc(&dev->counts[_DRM_STAT_OPENS]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_OPENS]); spin_lock(&dev->count_lock); - if (!dev->open_count++) { + if (local_inc_return(&dev->open_count) == 1) { spin_unlock(&dev->count_lock); retcode = drm_setup(dev); goto out; @@ -435,7 +435,7 @@ int drm_release(struct inode *inode, struct file *filp) lock_kernel(); - DRM_DEBUG("open_count = %d\n", dev->open_count); + DRM_DEBUG("open_count = %d\n", local_read(&dev->open_count)); if (dev->driver->preclose) dev->driver->preclose(dev, file_priv); @@ -447,7 +447,7 @@ int drm_release(struct inode *inode, struct file *filp) DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n", task_pid_nr(current), (long)old_encode_dev(file_priv->minor->device), - dev->open_count); + local_read(&dev->open_count)); /* if the master has gone away we can't do anything with the lock */ if (file_priv->minor->master) @@ -524,9 +524,9 @@ int drm_release(struct inode *inode, struct file *filp) * End inline drm_release */ - atomic_inc(&dev->counts[_DRM_STAT_CLOSES]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_CLOSES]); spin_lock(&dev->count_lock); - if (!--dev->open_count) { + if (local_dec_and_test(&dev->open_count)) { if (atomic_read(&dev->ioctl_count)) { DRM_ERROR("Device busy: %d\n", atomic_read(&dev->ioctl_count)); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 8bf3770..79422805 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -83,11 +83,11 @@ drm_gem_init(struct drm_device *dev) spin_lock_init(&dev->object_name_lock); idr_init(&dev->object_name_idr); atomic_set(&dev->object_count, 0); - atomic_set(&dev->object_memory, 0); + atomic_set_unchecked(&dev->object_memory, 0); atomic_set(&dev->pin_count, 0); - atomic_set(&dev->pin_memory, 0); + atomic_set_unchecked(&dev->pin_memory, 0); atomic_set(&dev->gtt_count, 0); - atomic_set(&dev->gtt_memory, 0); + atomic_set_unchecked(&dev->gtt_memory, 0); mm = kzalloc(sizeof(struct drm_gem_mm), GFP_KERNEL); if (!mm) { @@ -150,7 +150,7 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size) goto fput; } atomic_inc(&dev->object_count); - atomic_add(obj->size, &dev->object_memory); + atomic_add_unchecked(obj->size, &dev->object_memory); return obj; fput: fput(obj->filp); @@ -429,7 +429,7 @@ drm_gem_object_free(struct kref *kref) fput(obj->filp); atomic_dec(&dev->object_count); - atomic_sub(obj->size, &dev->object_memory); + atomic_sub_unchecked(obj->size, &dev->object_memory); kfree(obj); } EXPORT_SYMBOL(drm_gem_object_free); diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index f0f6c6b..34af322 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -75,10 +75,14 @@ int drm_vm_info(struct seq_file *m, void *data) struct drm_local_map *map; struct drm_map_list *r_list; - /* Hardcoded from _DRM_FRAME_BUFFER, - _DRM_REGISTERS, _DRM_SHM, _DRM_AGP, and - _DRM_SCATTER_GATHER and _DRM_CONSISTENT */ - const char *types[] = { "FB", "REG", "SHM", "AGP", "SG", "PCI" }; + static const char * const types[] = { + [_DRM_FRAME_BUFFER] = "FB", + [_DRM_REGISTERS] = "REG", + [_DRM_SHM] = "SHM", + [_DRM_AGP] = "AGP", + [_DRM_SCATTER_GATHER] = "SG", + [_DRM_CONSISTENT] = "PCI", + [_DRM_GEM] = "GEM" }; const char *type; int i; @@ -89,7 +93,7 @@ int drm_vm_info(struct seq_file *m, void *data) map = r_list->map; if (!map) continue; - if (map->type < 0 || map->type > 5) + if (map->type >= ARRAY_SIZE(types)) type = "??"; else type = types[map->type]; @@ -265,10 +269,10 @@ int drm_gem_object_info(struct seq_file *m, void* data) struct drm_device *dev = node->minor->dev; seq_printf(m, "%d objects\n", atomic_read(&dev->object_count)); - seq_printf(m, "%d object bytes\n", atomic_read(&dev->object_memory)); + seq_printf(m, "%d object bytes\n", atomic_read_unchecked(&dev->object_memory)); seq_printf(m, "%d pinned\n", atomic_read(&dev->pin_count)); - seq_printf(m, "%d pin bytes\n", atomic_read(&dev->pin_memory)); - seq_printf(m, "%d gtt bytes\n", atomic_read(&dev->gtt_memory)); + seq_printf(m, "%d pin bytes\n", atomic_read_unchecked(&dev->pin_memory)); + seq_printf(m, "%d gtt bytes\n", atomic_read_unchecked(&dev->gtt_memory)); seq_printf(m, "%d gtt total\n", dev->gtt_total); return 0; } @@ -288,7 +292,11 @@ int drm_vma_info(struct seq_file *m, void *data) mutex_lock(&dev->struct_mutex); seq_printf(m, "vma use count: %d, high_memory = %p, 0x%08llx\n", atomic_read(&dev->vma_count), +#ifdef CONFIG_GRKERNSEC_HIDESYM + NULL, 0); +#else high_memory, (u64)virt_to_phys(high_memory)); +#endif list_for_each_entry(pt, &dev->vmalist, head) { vma = pt->vma; @@ -296,14 +304,23 @@ int drm_vma_info(struct seq_file *m, void *data) continue; seq_printf(m, "\n%5d 0x%08lx-0x%08lx %c%c%c%c%c%c 0x%08lx000", - pt->pid, vma->vm_start, vma->vm_end, + pt->pid, +#ifdef CONFIG_GRKERNSEC_HIDESYM + 0, 0, +#else + vma->vm_start, vma->vm_end, +#endif vma->vm_flags & VM_READ ? 'r' : '-', vma->vm_flags & VM_WRITE ? 'w' : '-', vma->vm_flags & VM_EXEC ? 'x' : '-', vma->vm_flags & VM_MAYSHARE ? 's' : 'p', vma->vm_flags & VM_LOCKED ? 'l' : '-', vma->vm_flags & VM_IO ? 'i' : '-', +#ifdef CONFIG_GRKERNSEC_HIDESYM + 0); +#else vma->vm_pgoff); +#endif #if defined(__i386__) pgprot = pgprot_val(vma->vm_page_prot); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 282d9fd..71e5f11 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -463,7 +463,7 @@ static int compat_drm_infobufs(struct file *file, unsigned int cmd, request = compat_alloc_user_space(nbytes); if (!access_ok(VERIFY_WRITE, request, nbytes)) return -EFAULT; - list = (struct drm_buf_desc *) (request + 1); + list = (struct drm_buf_desc __user *) (request + 1); if (__put_user(count, &request->count) || __put_user(list, &request->list)) @@ -525,7 +525,7 @@ static int compat_drm_mapbufs(struct file *file, unsigned int cmd, request = compat_alloc_user_space(nbytes); if (!access_ok(VERIFY_WRITE, request, nbytes)) return -EFAULT; - list = (struct drm_buf_pub *) (request + 1); + list = (struct drm_buf_pub __user *) (request + 1); if (__put_user(count, &request->count) || __put_user(list, &request->list)) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 9b9ff46..4ea724c 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -283,7 +283,7 @@ int drm_getstats(struct drm_device *dev, void *data, stats->data[i].value = (file_priv->master->lock.hw_lock ? file_priv->master->lock.hw_lock->lock : 0); else - stats->data[i].value = atomic_read(&dev->counts[i]); + stats->data[i].value = atomic_read_unchecked(&dev->counts[i]); stats->data[i].type = dev->types[i]; } diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index e2f70a5..c703e86 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -87,7 +87,7 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) if (drm_lock_take(&master->lock, lock->context)) { master->lock.file_priv = file_priv; master->lock.lock_time = jiffies; - atomic_inc(&dev->counts[_DRM_STAT_LOCKS]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_LOCKS]); break; /* Got lock */ } @@ -165,7 +165,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; } - atomic_inc(&dev->counts[_DRM_STAT_UNLOCKS]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_UNLOCKS]); /* kernel_context_switch isn't used by any of the x86 drm * modules but is required by the Sparc driver. diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index 7d1d88c..b9131b2 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -952,8 +952,8 @@ static int i810_dma_vertex(struct drm_device *dev, void *data, dma->buflist[vertex->idx], vertex->discard, vertex->used); - atomic_add(vertex->used, &dev->counts[_DRM_STAT_SECONDARY]); - atomic_inc(&dev->counts[_DRM_STAT_DMA]); + atomic_add_unchecked(vertex->used, &dev->counts[_DRM_STAT_SECONDARY]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_DMA]); sarea_priv->last_enqueue = dev_priv->counter - 1; sarea_priv->last_dispatch = (int)hw_status[5]; @@ -1115,8 +1115,8 @@ static int i810_dma_mc(struct drm_device *dev, void *data, i810_dma_dispatch_mc(dev, dma->buflist[mc->idx], mc->used, mc->last_render); - atomic_add(mc->used, &dev->counts[_DRM_STAT_SECONDARY]); - atomic_inc(&dev->counts[_DRM_STAT_DMA]); + atomic_add_unchecked(mc->used, &dev->counts[_DRM_STAT_SECONDARY]); + atomic_inc_unchecked(&dev->counts[_DRM_STAT_DMA]); sarea_priv->last_enqueue = dev_priv->counter - 1; sarea_priv->last_dispatch = (int)hw_status[5]; diff --git a/drivers/gpu/drm/i810/i810_drv.h b/drivers/gpu/drm/i810/i810_drv.h index 21e2691..7321edd 100644 --- a/drivers/gpu/drm/i810/i810_drv.h +++ b/drivers/gpu/drm/i810/i810_drv.h @@ -108,8 +108,8 @@ typedef struct drm_i810_private { int page_flipping; wait_queue_head_t irq_queue; - atomic_t irq_received; - atomic_t irq_emitted; + atomic_unchecked_t irq_received; + atomic_unchecked_t irq_emitted; int front_offset; } drm_i810_private_t; diff --git a/drivers/gpu/drm/i830/i830_drv.h b/drivers/gpu/drm/i830/i830_drv.h index da82afe..48a45de 100644 --- a/drivers/gpu/drm/i830/i830_drv.h +++ b/drivers/gpu/drm/i830/i830_drv.h @@ -115,8 +115,8 @@ typedef struct drm_i830_private { int page_flipping; wait_queue_head_t irq_queue; - atomic_t irq_received; - atomic_t irq_emitted; + atomic_unchecked_t irq_received; + atomic_unchecked_t irq_emitted; int use_mi_batchbuffer_start; diff --git a/drivers/gpu/drm/i830/i830_irq.c b/drivers/gpu/drm/i830/i830_irq.c index 91ec2bb..6f21fab 100644 --- a/drivers/gpu/drm/i830/i830_irq.c +++ b/drivers/gpu/drm/i830/i830_irq.c @@ -47,7 +47,7 @@ irqreturn_t i830_driver_irq_handler(DRM_IRQ_ARGS) I830_WRITE16(I830REG_INT_IDENTITY_R, temp); - atomic_inc(&dev_priv->irq_received); + atomic_inc_unchecked(&dev_priv->irq_received); wake_up_interruptible(&dev_priv->irq_queue); return IRQ_HANDLED; @@ -60,14 +60,14 @@ static int i830_emit_irq(struct drm_device * dev) DRM_DEBUG("%s\n", __func__); - atomic_inc(&dev_priv->irq_emitted); + atomic_inc_unchecked(&dev_priv->irq_emitted); BEGIN_LP_RING(2); OUT_RING(0); OUT_RING(GFX_OP_USER_INTERRUPT); ADVANCE_LP_RING(); - return atomic_read(&dev_priv->irq_emitted); + return atomic_read_unchecked(&dev_priv->irq_emitted); } static int i830_wait_irq(struct drm_device * dev, int irq_nr) @@ -79,7 +79,7 @@ static int i830_wait_irq(struct drm_device * dev, int irq_nr) DRM_DEBUG("%s\n", __func__); - if (atomic_read(&dev_priv->irq_received) >= irq_nr) + if (atomic_read_unchecked(&dev_priv->irq_received) >= irq_nr) return 0; dev_priv->sarea_priv->perf_boxes |= I830_BOX_WAIT; @@ -88,7 +88,7 @@ static int i830_wait_irq(struct drm_device * dev, int irq_nr) for (;;) { __set_current_state(TASK_INTERRUPTIBLE); - if (atomic_read(&dev_priv->irq_received) >= irq_nr) + if (atomic_read_unchecked(&dev_priv->irq_received) >= irq_nr) break; if ((signed)(end - jiffies) <= 0) { DRM_ERROR("timeout iir %x imr %x ier %x hwstam %x\n", @@ -163,8 +163,8 @@ void i830_driver_irq_preinstall(struct drm_device * dev) I830_WRITE16(I830REG_HWSTAM, 0xffff); I830_WRITE16(I830REG_INT_MASK_R, 0x0); I830_WRITE16(I830REG_INT_ENABLE_R, 0x0); - atomic_set(&dev_priv->irq_received, 0); - atomic_set(&dev_priv->irq_emitted, 0); + atomic_set_unchecked(&dev_priv->irq_received, 0); + atomic_set_unchecked(&dev_priv->irq_emitted, 0); init_waitqueue_head(&dev_priv->irq_queue); } diff --git a/drivers/gpu/drm/i915/dvo.h b/drivers/gpu/drm/i915/dvo.h index 288fc50..c6092055 100644 --- a/drivers/gpu/drm/i915/dvo.h +++ b/drivers/gpu/drm/i915/dvo.h @@ -135,23 +135,23 @@ struct intel_dvo_dev_ops { * * \return singly-linked list of modes or NULL if no modes found. */ - struct drm_display_mode *(*get_modes)(struct intel_dvo_device *dvo); + struct drm_display_mode *(* const get_modes)(struct intel_dvo_device *dvo); /** * Clean up driver-specific bits of the output */ - void (*destroy) (struct intel_dvo_device *dvo); + void (* const destroy) (struct intel_dvo_device *dvo); /** * Debugging hook to dump device registers to log file */ - void (*dump_regs)(struct intel_dvo_device *dvo); + void (* const dump_regs)(struct intel_dvo_device *dvo); }; -extern struct intel_dvo_dev_ops sil164_ops; -extern struct intel_dvo_dev_ops ch7xxx_ops; -extern struct intel_dvo_dev_ops ivch_ops; -extern struct intel_dvo_dev_ops tfp410_ops; -extern struct intel_dvo_dev_ops ch7017_ops; +extern const struct intel_dvo_dev_ops sil164_ops; +extern const struct intel_dvo_dev_ops ch7xxx_ops; +extern const struct intel_dvo_dev_ops ivch_ops; +extern const struct intel_dvo_dev_ops tfp410_ops; +extern const struct intel_dvo_dev_ops ch7017_ops; #endif /* _INTEL_DVO_H */ diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c index 621815b..499d82e 100644 --- a/drivers/gpu/drm/i915/dvo_ch7017.c +++ b/drivers/gpu/drm/i915/dvo_ch7017.c @@ -443,7 +443,7 @@ static void ch7017_destroy(struct intel_dvo_device *dvo) } } -struct intel_dvo_dev_ops ch7017_ops = { +const struct intel_dvo_dev_ops ch7017_ops = { .init = ch7017_init, .detect = ch7017_detect, .mode_valid = ch7017_mode_valid, diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c index a9b8962..ac769ba 100644 --- a/drivers/gpu/drm/i915/dvo_ch7xxx.c +++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c @@ -356,7 +356,7 @@ static void ch7xxx_destroy(struct intel_dvo_device *dvo) } } -struct intel_dvo_dev_ops ch7xxx_ops = { +const struct intel_dvo_dev_ops ch7xxx_ops = { .init = ch7xxx_init, .detect = ch7xxx_detect, .mode_valid = ch7xxx_mode_valid, diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c index aa176f9..ed2930c 100644 --- a/drivers/gpu/drm/i915/dvo_ivch.c +++ b/drivers/gpu/drm/i915/dvo_ivch.c @@ -430,7 +430,7 @@ static void ivch_destroy(struct intel_dvo_device *dvo) } } -struct intel_dvo_dev_ops ivch_ops= { +const struct intel_dvo_dev_ops ivch_ops= { .init = ivch_init, .dpms = ivch_dpms, .save = ivch_save, diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/dvo_sil164.c index e1c1f73..7dbebcf 100644 --- a/drivers/gpu/drm/i915/dvo_sil164.c +++ b/drivers/gpu/drm/i915/dvo_sil164.c @@ -290,7 +290,7 @@ static void sil164_destroy(struct intel_dvo_device *dvo) } } -struct intel_dvo_dev_ops sil164_ops = { +const struct intel_dvo_dev_ops sil164_ops = { .init = sil164_init, .detect = sil164_detect, .mode_valid = sil164_mode_valid, diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c index 16dce84..7e1b6f8 100644 --- a/drivers/gpu/drm/i915/dvo_tfp410.c +++ b/drivers/gpu/drm/i915/dvo_tfp410.c @@ -323,7 +323,7 @@ static void tfp410_destroy(struct intel_dvo_device *dvo) } } -struct intel_dvo_dev_ops tfp410_ops = { +const struct intel_dvo_dev_ops tfp410_ops = { .init = tfp410_init, .detect = tfp410_detect, .mode_valid = tfp410_mode_valid, diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7e859d6..7d1cf2b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -192,7 +192,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(GTIMR)); } seq_printf(m, "Interrupts received: %d\n", - atomic_read(&dev_priv->irq_received)); + atomic_read_unchecked(&dev_priv->irq_received)); if (dev_priv->hw_status_page != NULL) { seq_printf(m, "Current sequence: %d\n", i915_get_gem_seqno(dev)); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 5449239..7e4f68d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -285,7 +285,7 @@ i915_pci_resume(struct pci_dev *pdev) return i915_resume(dev); } -static struct vm_operations_struct i915_gem_vm_ops = { +static const struct vm_operations_struct i915_gem_vm_ops = { .fault = i915_gem_fault, .open = drm_gem_vm_open, .close = drm_gem_vm_close, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 97163f7..c24c7c7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -168,7 +168,7 @@ struct drm_i915_display_funcs { /* display clock increase/decrease */ /* pll clock increase/decrease */ /* clock gating init */ -}; +} __no_const; typedef struct drm_i915_private { struct drm_device *dev; @@ -197,7 +197,7 @@ typedef struct drm_i915_private { int page_flipping; wait_queue_head_t irq_queue; - atomic_t irq_received; + atomic_unchecked_t irq_received; /** Protects user_irq_refcount and irq_mask_reg */ spinlock_t user_irq_lock; /** Refcount for i915_user_irq_get() versus i915_user_irq_put(). */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 27a3074..eb3f959 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -102,7 +102,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, args->aper_size = dev->gtt_total; args->aper_available_size = (args->aper_size - - atomic_read(&dev->pin_memory)); + atomic_read_unchecked(&dev->pin_memory)); return 0; } @@ -2058,7 +2058,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (obj_priv->gtt_space) { atomic_dec(&dev->gtt_count); - atomic_sub(obj->size, &dev->gtt_memory); + atomic_sub_unchecked(obj->size, &dev->gtt_memory); drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; @@ -2701,7 +2701,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) goto search_free; } atomic_inc(&dev->gtt_count); - atomic_add(obj->size, &dev->gtt_memory); + atomic_add_unchecked(obj->size, &dev->gtt_memory); /* Assert that the object is not currently in any GPU domain. As it * wasn't in the GTT, there shouldn't be any way it could have been in @@ -3755,9 +3755,9 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, "%d/%d gtt bytes\n", atomic_read(&dev->object_count), atomic_read(&dev->pin_count), - atomic_read(&dev->object_memory), - atomic_read(&dev->pin_memory), - atomic_read(&dev->gtt_memory), + atomic_read_unchecked(&dev->object_memory), + atomic_read_unchecked(&dev->pin_memory), + atomic_read_unchecked(&dev->gtt_memory), dev->gtt_total); } goto err; @@ -3989,7 +3989,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) */ if (obj_priv->pin_count == 1) { atomic_inc(&dev->pin_count); - atomic_add(obj->size, &dev->pin_memory); + atomic_add_unchecked(obj->size, &dev->pin_memory); if (!obj_priv->active && (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 && !list_empty(&obj_priv->list)) @@ -4022,7 +4022,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj) list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); atomic_dec(&dev->pin_count); - atomic_sub(obj->size, &dev->pin_memory); + atomic_sub_unchecked(obj->size, &dev->pin_memory); } i915_verify_inactive(dev, __FILE__, __LINE__); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 63f28ad..f5469da 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -528,7 +528,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) int irq_received; int ret = IRQ_NONE; - atomic_inc(&dev_priv->irq_received); + atomic_inc_unchecked(&dev_priv->irq_received); if (IS_IGDNG(dev)) return igdng_irq_handler(dev); @@ -1021,7 +1021,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; - atomic_set(&dev_priv->irq_received, 0); + atomic_set_unchecked(&dev_priv->irq_received, 0); INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func); INIT_WORK(&dev_priv->error_work, i915_error_work_func); diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 5d9c6a7..d1b0e29 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2795,7 +2795,9 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device) sdvo_priv->slave_addr = intel_sdvo_get_slave_addr(dev, output_device); /* Save the bit-banging i2c functionality for use by the DDC wrapper */ - intel_sdvo_i2c_bit_algo.functionality = intel_output->i2c_bus->algo->functionality; + pax_open_kernel(); + *(void **)&intel_sdvo_i2c_bit_algo.functionality = intel_output->i2c_bus->algo->functionality; + pax_close_kernel(); /* Read the regs to test if we can talk to the device */ for (i = 0; i < 0x40; i++) { diff --git a/drivers/gpu/drm/mga/mga_drv.h b/drivers/gpu/drm/mga/mga_drv.h index be6c6b9..8615d9c 100644 --- a/drivers/gpu/drm/mga/mga_drv.h +++ b/drivers/gpu/drm/mga/mga_drv.h @@ -120,9 +120,9 @@ typedef struct drm_mga_private { u32 clear_cmd; u32 maccess; - atomic_t vbl_received; /**< Number of vblanks received. */ + atomic_unchecked_t vbl_received; /**< Number of vblanks received. */ wait_queue_head_t fence_queue; - atomic_t last_fence_retired; + atomic_unchecked_t last_fence_retired; u32 next_fence_to_post; unsigned int fb_cpp; diff --git a/drivers/gpu/drm/mga/mga_irq.c b/drivers/gpu/drm/mga/mga_irq.c index daa6041..a28a5da 100644 --- a/drivers/gpu/drm/mga/mga_irq.c +++ b/drivers/gpu/drm/mga/mga_irq.c @@ -44,7 +44,7 @@ u32 mga_get_vblank_counter(struct drm_device *dev, int crtc) if (crtc != 0) return 0; - return atomic_read(&dev_priv->vbl_received); + return atomic_read_unchecked(&dev_priv->vbl_received); } @@ -60,7 +60,7 @@ irqreturn_t mga_driver_irq_handler(DRM_IRQ_ARGS) /* VBLANK interrupt */ if (status & MGA_VLINEPEN) { MGA_WRITE(MGA_ICLEAR, MGA_VLINEICLR); - atomic_inc(&dev_priv->vbl_received); + atomic_inc_unchecked(&dev_priv->vbl_received); drm_handle_vblank(dev, 0); handled = 1; } @@ -80,7 +80,7 @@ irqreturn_t mga_driver_irq_handler(DRM_IRQ_ARGS) MGA_WRITE(MGA_PRIMEND, prim_end); } - atomic_inc(&dev_priv->last_fence_retired); + atomic_inc_unchecked(&dev_priv->last_fence_retired); DRM_WAKEUP(&dev_priv->fence_queue); handled = 1; } @@ -131,7 +131,7 @@ int mga_driver_fence_wait(struct drm_device * dev, unsigned int *sequence) * using fences. */ DRM_WAIT_ON(ret, dev_priv->fence_queue, 3 * DRM_HZ, - (((cur_fence = atomic_read(&dev_priv->last_fence_retired)) + (((cur_fence = atomic_read_unchecked(&dev_priv->last_fence_retired)) - *sequence) <= (1 << 23))); *sequence = cur_fence; diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c index 4c39a40..b22a9ea 100644 --- a/drivers/gpu/drm/r128/r128_cce.c +++ b/drivers/gpu/drm/r128/r128_cce.c @@ -377,7 +377,7 @@ static int r128_do_init_cce(struct drm_device * dev, drm_r128_init_t * init) /* GH: Simple idle check. */ - atomic_set(&dev_priv->idle_count, 0); + atomic_set_unchecked(&dev_priv->idle_count, 0); /* We don't support anything other than bus-mastering ring mode, * but the ring can be in either AGP or PCI space for the ring diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h index 3c60829..4faf484 100644 --- a/drivers/gpu/drm/r128/r128_drv.h +++ b/drivers/gpu/drm/r128/r128_drv.h @@ -90,14 +90,14 @@ typedef struct drm_r128_private { int is_pci; unsigned long cce_buffers_offset; - atomic_t idle_count; + atomic_unchecked_t idle_count; int page_flipping; int current_page; u32 crtc_offset; u32 crtc_offset_cntl; - atomic_t vbl_received; + atomic_unchecked_t vbl_received; u32 color_fmt; unsigned int front_offset; diff --git a/drivers/gpu/drm/r128/r128_irq.c b/drivers/gpu/drm/r128/r128_irq.c index 69810fb..97bf17a 100644 --- a/drivers/gpu/drm/r128/r128_irq.c +++ b/drivers/gpu/drm/r128/r128_irq.c @@ -42,7 +42,7 @@ u32 r128_get_vblank_counter(struct drm_device *dev, int crtc) if (crtc != 0) return 0; - return atomic_read(&dev_priv->vbl_received); + return atomic_read_unchecked(&dev_priv->vbl_received); } irqreturn_t r128_driver_irq_handler(DRM_IRQ_ARGS) @@ -56,7 +56,7 @@ irqreturn_t r128_driver_irq_handler(DRM_IRQ_ARGS) /* VBLANK interrupt */ if (status & R128_CRTC_VBLANK_INT) { R128_WRITE(R128_GEN_INT_STATUS, R128_CRTC_VBLANK_INT_AK); - atomic_inc(&dev_priv->vbl_received); + atomic_inc_unchecked(&dev_priv->vbl_received); drm_handle_vblank(dev, 0); return IRQ_HANDLED; } diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c index af2665c..51922d2 100644 --- a/drivers/gpu/drm/r128/r128_state.c +++ b/drivers/gpu/drm/r128/r128_state.c @@ -323,10 +323,10 @@ static void r128_clear_box(drm_r128_private_t * dev_priv, static void r128_cce_performance_boxes(drm_r128_private_t * dev_priv) { - if (atomic_read(&dev_priv->idle_count) == 0) { + if (atomic_read_unchecked(&dev_priv->idle_count) == 0) { r128_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0); } else { - atomic_set(&dev_priv->idle_count, 0); + atomic_set_unchecked(&dev_priv->idle_count, 0); } } diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c index dd72b91..8644b3c 100644 --- a/drivers/gpu/drm/radeon/atom.c +++ b/drivers/gpu/drm/radeon/atom.c @@ -1115,6 +1115,8 @@ struct atom_context *atom_parse(struct card_info *card, void *bios) char name[512]; int i; + pax_track_stack(); + ctx->card = card; ctx->bios = bios; diff --git a/drivers/gpu/drm/radeon/mkregtable.c b/drivers/gpu/drm/radeon/mkregtable.c index 0d79577..efaa7a5 100644 --- a/drivers/gpu/drm/radeon/mkregtable.c +++ b/drivers/gpu/drm/radeon/mkregtable.c @@ -637,14 +637,14 @@ static int parser_auth(struct table *t, const char *filename) regex_t mask_rex; regmatch_t match[4]; char buf[1024]; - size_t end; + long end; int len; int done = 0; int r; unsigned o; struct offset *offset; char last_reg_s[10]; - int last_reg; + unsigned long last_reg; if (regcomp (&mask_rex, "(0x[0-9a-fA-F]*) *([_a-zA-Z0-9]*)", REG_EXTENDED)) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 6735213..38c2c67 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -149,7 +149,7 @@ int radeon_pm_init(struct radeon_device *rdev); */ struct radeon_fence_driver { uint32_t scratch_reg; - atomic_t seq; + atomic_unchecked_t seq; uint32_t last_seq; unsigned long count_timeout; wait_queue_head_t queue; @@ -640,7 +640,7 @@ struct radeon_asic { uint32_t offset, uint32_t obj_size); int (*clear_surface_reg)(struct radeon_device *rdev, int reg); void (*bandwidth_update)(struct radeon_device *rdev); -}; +} __no_const; /* * Asic structures diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 4e928b9..d8b6008 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -275,6 +275,8 @@ bool radeon_get_atom_connector_info_from_object_table(struct drm_device *dev) bool linkb; struct radeon_i2c_bus_rec ddc_bus; + pax_track_stack(); + atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset); if (data_offset == 0) @@ -520,13 +522,13 @@ static uint16_t atombios_get_connector_object_id(struct drm_device *dev, } } -struct bios_connector { +static struct bios_connector { bool valid; uint16_t line_mux; uint16_t devices; int connector_type; struct radeon_i2c_bus_rec ddc_bus; -}; +} bios_connectors[ATOM_MAX_SUPPORTED_DEVICE]; bool radeon_get_atom_connector_info_from_supported_devices_table(struct drm_device @@ -542,7 +544,6 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct uint8_t dac; union atom_supported_devices *supported_devices; int i, j; - struct bios_connector bios_connectors[ATOM_MAX_SUPPORTED_DEVICE]; atom_parse_data_header(ctx, index, &size, &frev, &crev, &data_offset); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 083a181..ccccae0 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -482,7 +482,7 @@ void radeon_compute_pll(struct radeon_pll *pll, if (flags & RADEON_PLL_PREFER_CLOSEST_LOWER) { error = freq - current_freq; - error = error < 0 ? 0xffffffff : error; + error = (int32_t)error < 0 ? 0xffffffff : error; } else error = abs(current_freq - freq); vco_diff = abs(vco - best_vco); diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 76e4070..193fa7f 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -253,7 +253,7 @@ typedef struct drm_radeon_private { /* SW interrupt */ wait_queue_head_t swi_queue; - atomic_t swi_emitted; + atomic_unchecked_t swi_emitted; int vblank_crtc; uint32_t irq_enable_reg; uint32_t r500_disp_irq_reg; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 3beb26d..6ce9c4a 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -47,7 +47,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) write_unlock_irqrestore(&rdev->fence_drv.lock, irq_flags); return 0; } - fence->seq = atomic_add_return(1, &rdev->fence_drv.seq); + fence->seq = atomic_add_return_unchecked(1, &rdev->fence_drv.seq); if (!rdev->cp.ready) { /* FIXME: cp is not running assume everythings is done right * away @@ -364,7 +364,7 @@ int radeon_fence_driver_init(struct radeon_device *rdev) return r; } WREG32(rdev->fence_drv.scratch_reg, 0); - atomic_set(&rdev->fence_drv.seq, 0); + atomic_set_unchecked(&rdev->fence_drv.seq, 0); INIT_LIST_HEAD(&rdev->fence_drv.created); INIT_LIST_HEAD(&rdev->fence_drv.emited); INIT_LIST_HEAD(&rdev->fence_drv.signaled); diff --git a/drivers/gpu/drm/radeon/radeon_ioc32.c b/drivers/gpu/drm/radeon/radeon_ioc32.c index a1bf11d..4a123c0 100644 --- a/drivers/gpu/drm/radeon/radeon_ioc32.c +++ b/drivers/gpu/drm/radeon/radeon_ioc32.c @@ -368,7 +368,7 @@ static int compat_radeon_cp_setparam(struct file *file, unsigned int cmd, request = compat_alloc_user_space(sizeof(*request)); if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) || __put_user(req32.param, &request->param) - || __put_user((void __user *)(unsigned long)req32.value, + || __put_user((unsigned long)req32.value, &request->value)) return -EFAULT; diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c index b79ecc4..8dab92d 100644 --- a/drivers/gpu/drm/radeon/radeon_irq.c +++ b/drivers/gpu/drm/radeon/radeon_irq.c @@ -225,8 +225,8 @@ static int radeon_emit_irq(struct drm_device * dev) unsigned int ret; RING_LOCALS; - atomic_inc(&dev_priv->swi_emitted); - ret = atomic_read(&dev_priv->swi_emitted); + atomic_inc_unchecked(&dev_priv->swi_emitted); + ret = atomic_read_unchecked(&dev_priv->swi_emitted); BEGIN_RING(4); OUT_RING_REG(RADEON_LAST_SWI_REG, ret); @@ -352,7 +352,7 @@ int radeon_driver_irq_postinstall(struct drm_device *dev) drm_radeon_private_t *dev_priv = (drm_radeon_private_t *) dev->dev_private; - atomic_set(&dev_priv->swi_emitted, 0); + atomic_set_unchecked(&dev_priv->swi_emitted, 0); DRM_INIT_WAITQUEUE(&dev_priv->swi_queue); dev->max_vblank_count = 0x001fffff; diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 4747910..48ca4b3 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -3021,7 +3021,7 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil { drm_radeon_private_t *dev_priv = dev->dev_private; drm_radeon_getparam_t *param = data; - int value; + int value = 0; DRM_DEBUG("pid=%d\n", DRM_CURRENTPID); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 1381e06..0e53b17 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -535,27 +535,10 @@ void radeon_ttm_fini(struct radeon_device *rdev) DRM_INFO("radeon: ttm finalized\n"); } -static struct vm_operations_struct radeon_ttm_vm_ops; -static const struct vm_operations_struct *ttm_vm_ops = NULL; - -static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct ttm_buffer_object *bo; - int r; - - bo = (struct ttm_buffer_object *)vma->vm_private_data; - if (bo == NULL) { - return VM_FAULT_NOPAGE; - } - r = ttm_vm_ops->fault(vma, vmf); - return r; -} - int radeon_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_file *file_priv; struct radeon_device *rdev; - int r; if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) { return drm_mmap(filp, vma); @@ -563,20 +546,9 @@ int radeon_mmap(struct file *filp, struct vm_area_struct *vma) file_priv = (struct drm_file *)filp->private_data; rdev = file_priv->minor->dev->dev_private; - if (rdev == NULL) { + if (!rdev) return -EINVAL; - } - r = ttm_bo_mmap(filp, vma, &rdev->mman.bdev); - if (unlikely(r != 0)) { - return r; - } - if (unlikely(ttm_vm_ops == NULL)) { - ttm_vm_ops = vma->vm_ops; - radeon_ttm_vm_ops = *ttm_vm_ops; - radeon_ttm_vm_ops.fault = &radeon_ttm_fault; - } - vma->vm_ops = &radeon_ttm_vm_ops; - return 0; + return ttm_bo_mmap(filp, vma, &rdev->mman.bdev); } diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index b12ff76..0bd0c6e 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -302,9 +302,11 @@ void rs690_crtc_bandwidth_compute(struct radeon_device *rdev, if (rdev->pm.max_bandwidth.full > rdev->pm.sideport_bandwidth.full && rdev->pm.sideport_bandwidth.full) rdev->pm.max_bandwidth = rdev->pm.sideport_bandwidth; - read_delay_latency.full = rfixed_const(370 * 800 * 1000); + read_delay_latency.full = rfixed_const(800 * 1000); read_delay_latency.full = rfixed_div(read_delay_latency, rdev->pm.igp_sideport_mclk); + a.full = rfixed_const(370); + read_delay_latency.full = rfixed_mul(read_delay_latency, a); } else { if (rdev->pm.max_bandwidth.full > rdev->pm.k8_bandwidth.full && rdev->pm.k8_bandwidth.full) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 0ed436e..e6e7ce3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -67,7 +67,7 @@ static struct attribute *ttm_bo_global_attrs[] = { NULL }; -static struct sysfs_ops ttm_bo_global_ops = { +static const struct sysfs_ops ttm_bo_global_ops = { .show = &ttm_bo_global_show }; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 1c040d0..f9e4af8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -73,7 +73,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct ttm_buffer_object *bo = (struct ttm_buffer_object *) vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; + struct ttm_bo_device *bdev; unsigned long bus_base; unsigned long bus_offset; unsigned long bus_size; @@ -88,6 +88,10 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long address = (unsigned long)vmf->virtual_address; int retval = VM_FAULT_NOPAGE; + if (!bo) + return VM_FAULT_NOPAGE; + bdev = bo->bdev; + /* * Work around locking order reversal in fault / nopfn * between mmap_sem and bo_reserve: Perform a trylock operation diff --git a/drivers/gpu/drm/ttm/ttm_global.c b/drivers/gpu/drm/ttm/ttm_global.c index b170071..28ae90e 100644 --- a/drivers/gpu/drm/ttm/ttm_global.c +++ b/drivers/gpu/drm/ttm/ttm_global.c @@ -36,7 +36,7 @@ struct ttm_global_item { struct mutex mutex; void *object; - int refcount; + atomic_t refcount; }; static struct ttm_global_item glob[TTM_GLOBAL_NUM]; @@ -49,7 +49,7 @@ void ttm_global_init(void) struct ttm_global_item *item = &glob[i]; mutex_init(&item->mutex); item->object = NULL; - item->refcount = 0; + atomic_set(&item->refcount, 0); } } @@ -59,7 +59,7 @@ void ttm_global_release(void) for (i = 0; i < TTM_GLOBAL_NUM; ++i) { struct ttm_global_item *item = &glob[i]; BUG_ON(item->object != NULL); - BUG_ON(item->refcount != 0); + BUG_ON(atomic_read(&item->refcount) != 0); } } @@ -70,7 +70,7 @@ int ttm_global_item_ref(struct ttm_global_reference *ref) void *object; mutex_lock(&item->mutex); - if (item->refcount == 0) { + if (atomic_read(&item->refcount) == 0) { item->object = kzalloc(ref->size, GFP_KERNEL); if (unlikely(item->object == NULL)) { ret = -ENOMEM; @@ -83,7 +83,7 @@ int ttm_global_item_ref(struct ttm_global_reference *ref) goto out_err; } - ++item->refcount; + atomic_inc(&item->refcount); ref->object = item->object; object = item->object; mutex_unlock(&item->mutex); @@ -100,9 +100,9 @@ void ttm_global_item_unref(struct ttm_global_reference *ref) struct ttm_global_item *item = &glob[ref->global_type]; mutex_lock(&item->mutex); - BUG_ON(item->refcount == 0); + BUG_ON(atomic_read(&item->refcount) == 0); BUG_ON(ref->object != item->object); - if (--item->refcount == 0) { + if (atomic_dec_and_test(&item->refcount)) { ref->release(ref); item->object = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index 072c281..d8ef483 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -152,7 +152,7 @@ static struct attribute *ttm_mem_zone_attrs[] = { NULL }; -static struct sysfs_ops ttm_mem_zone_ops = { +static const struct sysfs_ops ttm_mem_zone_ops = { .show = &ttm_mem_zone_show, .store = &ttm_mem_zone_store }; diff --git a/drivers/gpu/drm/via/via_drv.h b/drivers/gpu/drm/via/via_drv.h index cafcb84..b8e66cc 100644 --- a/drivers/gpu/drm/via/via_drv.h +++ b/drivers/gpu/drm/via/via_drv.h @@ -51,7 +51,7 @@ typedef struct drm_via_ring_buffer { typedef uint32_t maskarray_t[5]; typedef struct drm_via_irq { - atomic_t irq_received; + atomic_unchecked_t irq_received; uint32_t pending_mask; uint32_t enable_mask; wait_queue_head_t irq_queue; @@ -75,7 +75,7 @@ typedef struct drm_via_private { struct timeval last_vblank; int last_vblank_valid; unsigned usec_per_vblank; - atomic_t vbl_received; + atomic_unchecked_t vbl_received; drm_via_state_t hc_state; char pci_buf[VIA_PCI_BUF_SIZE]; const uint32_t *fire_offsets[VIA_FIRE_BUF_SIZE]; diff --git a/drivers/gpu/drm/via/via_irq.c b/drivers/gpu/drm/via/via_irq.c index 5935b88..127a8a6 100644 --- a/drivers/gpu/drm/via/via_irq.c +++ b/drivers/gpu/drm/via/via_irq.c @@ -102,7 +102,7 @@ u32 via_get_vblank_counter(struct drm_device *dev, int crtc) if (crtc != 0) return 0; - return atomic_read(&dev_priv->vbl_received); + return atomic_read_unchecked(&dev_priv->vbl_received); } irqreturn_t via_driver_irq_handler(DRM_IRQ_ARGS) @@ -117,8 +117,8 @@ irqreturn_t via_driver_irq_handler(DRM_IRQ_ARGS) status = VIA_READ(VIA_REG_INTERRUPT); if (status & VIA_IRQ_VBLANK_PENDING) { - atomic_inc(&dev_priv->vbl_received); - if (!(atomic_read(&dev_priv->vbl_received) & 0x0F)) { + atomic_inc_unchecked(&dev_priv->vbl_received); + if (!(atomic_read_unchecked(&dev_priv->vbl_received) & 0x0F)) { do_gettimeofday(&cur_vblank); if (dev_priv->last_vblank_valid) { dev_priv->usec_per_vblank = @@ -128,7 +128,7 @@ irqreturn_t via_driver_irq_handler(DRM_IRQ_ARGS) dev_priv->last_vblank = cur_vblank; dev_priv->last_vblank_valid = 1; } - if (!(atomic_read(&dev_priv->vbl_received) & 0xFF)) { + if (!(atomic_read_unchecked(&dev_priv->vbl_received) & 0xFF)) { DRM_DEBUG("US per vblank is: %u\n", dev_priv->usec_per_vblank); } @@ -138,7 +138,7 @@ irqreturn_t via_driver_irq_handler(DRM_IRQ_ARGS) for (i = 0; i < dev_priv->num_irqs; ++i) { if (status & cur_irq->pending_mask) { - atomic_inc(&cur_irq->irq_received); + atomic_inc_unchecked(&cur_irq->irq_received); DRM_WAKEUP(&cur_irq->irq_queue); handled = 1; if (dev_priv->irq_map[drm_via_irq_dma0_td] == i) { @@ -244,11 +244,11 @@ via_driver_irq_wait(struct drm_device * dev, unsigned int irq, int force_sequenc DRM_WAIT_ON(ret, cur_irq->irq_queue, 3 * DRM_HZ, ((VIA_READ(masks[irq][2]) & masks[irq][3]) == masks[irq][4])); - cur_irq_sequence = atomic_read(&cur_irq->irq_received); + cur_irq_sequence = atomic_read_unchecked(&cur_irq->irq_received); } else { DRM_WAIT_ON(ret, cur_irq->irq_queue, 3 * DRM_HZ, (((cur_irq_sequence = - atomic_read(&cur_irq->irq_received)) - + atomic_read_unchecked(&cur_irq->irq_received)) - *sequence) <= (1 << 23))); } *sequence = cur_irq_sequence; @@ -286,7 +286,7 @@ void via_driver_irq_preinstall(struct drm_device * dev) } for (i = 0; i < dev_priv->num_irqs; ++i) { - atomic_set(&cur_irq->irq_received, 0); + atomic_set_unchecked(&cur_irq->irq_received, 0); cur_irq->enable_mask = dev_priv->irq_masks[i][0]; cur_irq->pending_mask = dev_priv->irq_masks[i][1]; DRM_INIT_WAITQUEUE(&cur_irq->irq_queue); @@ -368,7 +368,7 @@ int via_wait_irq(struct drm_device *dev, void *data, struct drm_file *file_priv) switch (irqwait->request.type & ~VIA_IRQ_FLAGS_MASK) { case VIA_IRQ_RELATIVE: irqwait->request.sequence += - atomic_read(&cur_irq->irq_received); + atomic_read_unchecked(&cur_irq->irq_received); irqwait->request.type &= ~_DRM_VBLANK_RELATIVE; case VIA_IRQ_ABSOLUTE: break; diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index aa8688d..6a0140c 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -894,14 +894,20 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf, uc = &priv->cards[i]; } - if (!uc) - return -EINVAL; + if (!uc) { + ret_val = -EINVAL; + goto done; + } - if (io_state & VGA_RSRC_LEGACY_IO && uc->io_cnt == 0) - return -EINVAL; + if (io_state & VGA_RSRC_LEGACY_IO && uc->io_cnt == 0) { + ret_val = -EINVAL; + goto done; + } - if (io_state & VGA_RSRC_LEGACY_MEM && uc->mem_cnt == 0) - return -EINVAL; + if (io_state & VGA_RSRC_LEGACY_MEM && uc->mem_cnt == 0) { + ret_val = -EINVAL; + goto done; + } vga_put(pdev, io_state); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 11f8069..4783396 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1752,7 +1752,7 @@ static bool hid_ignore(struct hid_device *hdev) int hid_add_device(struct hid_device *hdev) { - static atomic_t id = ATOMIC_INIT(0); + static atomic_unchecked_t id = ATOMIC_INIT(0); int ret; if (WARN_ON(hdev->status & HID_STAT_ADDED)) @@ -1766,7 +1766,7 @@ int hid_add_device(struct hid_device *hdev) /* XXX hack, any other cleaner solution after the driver core * is converted to allow more than 20 bytes as the device name? */ dev_set_name(&hdev->dev, "%04X:%04X:%04X.%04X", hdev->bus, - hdev->vendor, hdev->product, atomic_inc_return(&id)); + hdev->vendor, hdev->product, atomic_inc_return_unchecked(&id)); ret = device_add(&hdev->dev); if (!ret) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 8b6ee24..70f657d 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -617,7 +617,7 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return put_user(HID_VERSION, (int __user *)arg); case HIDIOCAPPLICATION: - if (arg < 0 || arg >= hid->maxapplication) + if (arg >= hid->maxapplication) return -EINVAL; for (i = 0; i < hid->maxcollection; i++) diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c index 5d5ed69..f40533e 100644 --- a/drivers/hwmon/lis3lv02d.c +++ b/drivers/hwmon/lis3lv02d.c @@ -146,7 +146,7 @@ static irqreturn_t lis302dl_interrupt(int irq, void *dummy) * the lid is closed. This leads to interrupts as soon as a little move * is done. */ - atomic_inc(&lis3_dev.count); + atomic_inc_unchecked(&lis3_dev.count); wake_up_interruptible(&lis3_dev.misc_wait); kill_fasync(&lis3_dev.async_queue, SIGIO, POLL_IN); @@ -160,7 +160,7 @@ static int lis3lv02d_misc_open(struct inode *inode, struct file *file) if (test_and_set_bit(0, &lis3_dev.misc_opened)) return -EBUSY; /* already open */ - atomic_set(&lis3_dev.count, 0); + atomic_set_unchecked(&lis3_dev.count, 0); /* * The sensor can generate interrupts for free-fall and direction @@ -206,7 +206,7 @@ static ssize_t lis3lv02d_misc_read(struct file *file, char __user *buf, add_wait_queue(&lis3_dev.misc_wait, &wait); while (true) { set_current_state(TASK_INTERRUPTIBLE); - data = atomic_xchg(&lis3_dev.count, 0); + data = atomic_xchg_unchecked(&lis3_dev.count, 0); if (data) break; @@ -244,7 +244,7 @@ out: static unsigned int lis3lv02d_misc_poll(struct file *file, poll_table *wait) { poll_wait(file, &lis3_dev.misc_wait, wait); - if (atomic_read(&lis3_dev.count)) + if (atomic_read_unchecked(&lis3_dev.count)) return POLLIN | POLLRDNORM; return 0; } diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h index 7cdd76f..fe0efdf 100644 --- a/drivers/hwmon/lis3lv02d.h +++ b/drivers/hwmon/lis3lv02d.h @@ -201,7 +201,7 @@ struct lis3lv02d { struct input_polled_dev *idev; /* input device */ struct platform_device *pdev; /* platform device */ - atomic_t count; /* interrupt count after last read */ + atomic_unchecked_t count; /* interrupt count after last read */ int xcalib; /* calibrated null value for x */ int ycalib; /* calibrated null value for y */ int zcalib; /* calibrated null value for z */ diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index 2040507..706ec1e 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -112,7 +112,7 @@ struct sht15_data { int supply_uV; int supply_uV_valid; struct work_struct update_supply_work; - atomic_t interrupt_handled; + atomic_unchecked_t interrupt_handled; }; /** @@ -245,13 +245,13 @@ static inline int sht15_update_single_val(struct sht15_data *data, return ret; gpio_direction_input(data->pdata->gpio_data); - atomic_set(&data->interrupt_handled, 0); + atomic_set_unchecked(&data->interrupt_handled, 0); enable_irq(gpio_to_irq(data->pdata->gpio_data)); if (gpio_get_value(data->pdata->gpio_data) == 0) { disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data)); /* Only relevant if the interrupt hasn't occured. */ - if (!atomic_read(&data->interrupt_handled)) + if (!atomic_read_unchecked(&data->interrupt_handled)) schedule_work(&data->read_work); } ret = wait_event_timeout(data->wait_queue, @@ -398,7 +398,7 @@ static irqreturn_t sht15_interrupt_fired(int irq, void *d) struct sht15_data *data = d; /* First disable the interrupt */ disable_irq_nosync(irq); - atomic_inc(&data->interrupt_handled); + atomic_inc_unchecked(&data->interrupt_handled); /* Then schedule a reading work struct */ if (data->flag != SHT15_READING_NOTHING) schedule_work(&data->read_work); @@ -449,11 +449,11 @@ static void sht15_bh_read_data(struct work_struct *work_s) here as could have gone low in meantime s