diff --git a/Documentation/dontdiff b/Documentation/dontdiff index dfa6fc6..ccbfbf3 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -2,9 +2,11 @@ *.aux *.bin *.bz2 +*.c.[012]*.* *.cis *.cpio *.csp +*.dbg *.dsp *.dvi *.elf @@ -14,6 +16,7 @@ *.gcov *.gen.S *.gif +*.gmo *.grep *.grp *.gz @@ -48,14 +51,17 @@ *.tab.h *.tex *.ver +*.vim *.xml *.xz *_MODULES +*_reg_safe.h *_vga16.c *~ \#*# *.9 -.* +.[^g]* +.gen* .*.d .mm 53c700_d.h @@ -70,9 +76,11 @@ Kerntypes Module.markers Module.symvers PENDING +PERF* SCCS System.map* TAGS +TRACEEVENT-CFLAGS aconf af_names.h aic7*reg.h* @@ -81,6 +89,7 @@ aic7*seq.h* aicasm aicdb.h* altivec*.c +ashldi3.S asm-offsets.h asm_offsets.h autoconf.h* @@ -93,19 +102,24 @@ bounds.h bsetup btfixupprep build +builtin-policy.h bvmlinux bzImage* capability_names.h capflags.c classlist.h* +clut_vga16.c +common-cmds.h comp*.log compile.h* conf config config-* config_data.h* +config.c config.mak config.mak.autogen +config.tmp conmakehash consolemap_deftbl.c* cpustr.h @@ -116,9 +130,11 @@ devlist.h* dnotify_test docproc dslm +dtc-lexer.lex.c elf2ecoff elfconfig.h* evergreen_reg_safe.h +exception_policy.conf fixdep flask.h fore200e_mkfirm @@ -126,12 +142,15 @@ fore200e_pca_fw.c* gconf gconf.glade.h gen-devlist +gen-kdb_cmds.c gen_crc32table gen_init_cpio generated genheaders genksyms *_gray256.c +hash +hid-example hpet_example hugepage-mmap hugepage-shm @@ -146,7 +165,7 @@ int32.c int4.c int8.c kallsyms -kconfig +kern_constants.h keywords.c ksym.c* ksym.h* @@ -154,7 +173,7 @@ kxgettext lkc_defs.h lex.c lex.*.c -linux +lib1funcs.S logo_*.c logo_*_clut224.c logo_*_mono.c @@ -166,14 +185,15 @@ machtypes.h map map_hugetlb maui_boot.h -media mconf +mdp miboot* mk_elfconfig mkboot mkbugboot mkcpustr mkdep +mkpiggy mkprep mkregtable mktables @@ -208,7 +228,10 @@ r200_reg_safe.h r300_reg_safe.h r420_reg_safe.h r600_reg_safe.h +randomize_layout_hash.h +randomize_layout_seed.h recordmcount +regdb.c relocs rlim_names.h rn50_reg_safe.h @@ -218,7 +241,10 @@ series setup setup.bin setup.elf +signing_key* +size_overflow_hash.h sImage +slabinfo sm_tbl* split-include syscalltab.h @@ -229,6 +255,7 @@ tftpboot.img timeconst.h times.h* trix_boot.h +user_constants.h utsrelease.h* vdso-syms.lds vdso.lds @@ -246,7 +273,9 @@ vmlinux vmlinux-* vmlinux.aout vmlinux.bin.all +vmlinux.bin.bz2 vmlinux.lds +vmlinux.relocs vmlinuz voffset.h vsyscall.lds @@ -254,9 +283,12 @@ vsyscall_32.lds wanxlfw.inc uImage unifdef +utsrelease.h wakeup.bin wakeup.elf wakeup.lds +x509* zImage* zconf.hash.c +zconf.lex.c zoffset.h diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f0001eb..8f5703b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -859,6 +859,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. gpt [EFI] Forces disk with valid GPT signature but invalid Protective MBR to be treated as GPT. + grsec_proc_gid= [GRKERNSEC_PROC_USERGROUP] Chooses GID to + ignore grsecurity's /proc restrictions + + grsec_sysfs_restrict= Format: 0 | 1 + Default: 1 + Disables GRKERNSEC_SYSFS_RESTRICT if enabled in config + hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on for 64-bit NUMA, off otherwise. @@ -1963,6 +1970,27 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. + pax_nouderef [X86] disables UDEREF. Most likely needed under certain + virtualization environments that don't cope well with the + expand down segment used by UDEREF on X86-32 or the frequent + page table updates on X86-64. + + pax_sanitize_slab= + Format: { 0 | 1 | off | fast | full } + Options '0' and '1' are only provided for backward + compatibility, 'off' or 'fast' should be used instead. + 0|off : disable slab object sanitization + 1|fast: enable slab object sanitization excluding + whitelisted slabs (default) + full : sanitize all slabs, even the whitelisted ones + + pax_softmode= 0/1 to disable/enable PaX softmode on boot already. + + pax_extra_latent_entropy + Enable a very simple form of latent entropy extraction + from the first 4GB of memory as the bootmem allocator + passes the memory pages to the buddy allocator. + pcbit= [HW,ISDN] pcd. [PARIDE] diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 88fd7f5..b318a78 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -163,16 +163,22 @@ This value can be used to query and set the core dump mode for setuid or otherwise protected/tainted binaries. The modes are 0 - (default) - traditional behaviour. Any process which has changed - privilege levels or is execute only will not be dumped + privilege levels or is execute only will not be dumped. 1 - (debug) - all processes dump core when possible. The core dump is owned by the current user and no security is applied. This is intended for system debugging situations only. Ptrace is unchecked. + This is insecure as it allows regular users to examine the memory + contents of privileged processes. 2 - (suidsafe) - any binary which normally would not be dumped is dumped - readable by root only. This allows the end user to remove - such a dump but not access it directly. For security reasons - core dumps in this mode will not overwrite one another or - other files. This mode is appropriate when administrators are - attempting to debug problems in a normal environment. + anyway, but only if the "core_pattern" kernel sysctl is set to + either a pipe handler or a fully qualified path. (For more details + on this limitation, see CVE-2006-2451.) This mode is appropriate + when administrators are attempting to debug problems in a normal + environment, and either have a core dump pipe handler that knows + to treat privileged core dumps with care, or specific directory + defined for catching core dumps. If a core dump happens without + a pipe handler or fully qualifid path, a message will be emitted + to syslog warning about the lack of a correct setting. ============================================================== diff --git a/Documentation/vserver/debug.txt b/Documentation/vserver/debug.txt new file mode 100644 index 0000000..4464a0f --- /dev/null +++ b/Documentation/vserver/debug.txt @@ -0,0 +1,154 @@ + +debug_cvirt: + + 2 4 "vx_map_tgid: %p/%llx: %d -> %d" + "vx_rmap_tgid: %p/%llx: %d -> %d" + +debug_dlim: + + 0 1 "ALLOC (%p,#%d)%c inode (%d)" + "FREE (%p,#%d)%c inode" + 1 2 "ALLOC (%p,#%d)%c %lld bytes (%d)" + "FREE (%p,#%d)%c %lld bytes" + 2 4 "ADJUST: %lld,%lld on %ld,%ld [mult=%d]" + 3 8 "ext3_has_free_blocks(%p): %lu<%lu+1, %c, %u!=%u r=%d" + "ext3_has_free_blocks(%p): free=%lu, root=%lu" + "rcu_free_dl_info(%p)" + 4 10 "alloc_dl_info(%p,%d) = %p" + "dealloc_dl_info(%p)" + "get_dl_info(%p[#%d.%d])" + "put_dl_info(%p[#%d.%d])" + 5 20 "alloc_dl_info(%p,%d)*" + 6 40 "__hash_dl_info: %p[#%d]" + "__unhash_dl_info: %p[#%d]" + 7 80 "locate_dl_info(%p,#%d) = %p" + +debug_misc: + + 0 1 "destroy_dqhash: %p [#0x%08x] c=%d" + "new_dqhash: %p [#0x%08x]" + "vroot[%d]_clr_dev: dev=%p[%lu,%d:%d]" + "vroot[%d]_get_real_bdev: dev=%p[%lu,%d:%d]" + "vroot[%d]_set_dev: dev=%p[%lu,%d:%d]" + "vroot_get_real_bdev not set" + 1 2 "cow_break_link(»%s«)" + "temp copy »%s«" + 2 4 "dentry_open(new): %p" + "dentry_open(old): %p" + "lookup_create(new): %p" + "old path »%s«" + "path_lookup(old): %d" + "vfs_create(new): %d" + "vfs_rename: %d" + "vfs_sendfile: %d" + 3 8 "fput(new_file=%p[#%d])" + "fput(old_file=%p[#%d])" + 4 10 "vx_info_kill(%p[#%d],%d,%d) = %d" + "vx_info_kill(%p[#%d],%d,%d)*" + 5 20 "vs_reboot(%p[#%d],%d)" + 6 40 "dropping task %p[#%u,%u] for %p[#%u,%u]" + +debug_net: + + 2 4 "nx_addr_conflict(%p,%p) %d.%d,%d.%d" + 3 8 "inet_bind(%p) %d.%d.%d.%d, %d.%d.%d.%d, %d.%d.%d.%d" + "inet_bind(%p)* %p,%p;%lx %d.%d.%d.%d" + 4 10 "ip_route_connect(%p) %p,%p;%lx" + 5 20 "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx" + 6 40 "sk,egf: %p [#%d] (from %d)" + "sk,egn: %p [#%d] (from %d)" + "sk,req: %p [#%d] (from %d)" + "sk: %p [#%d] (from %d)" + "tw: %p [#%d] (from %d)" + 7 80 "__sock_recvmsg: %p[%p,%p,%p;%d]:%d/%d" + "__sock_sendmsg: %p[%p,%p,%p;%d]:%d/%d" + +debug_nid: + + 0 1 "__lookup_nx_info(#%u): %p[#%u]" + "alloc_nx_info(%d) = %p" + "create_nx_info(%d) (dynamic rejected)" + "create_nx_info(%d) = %p (already there)" + "create_nx_info(%d) = %p (new)" + "dealloc_nx_info(%p)" + 1 2 "alloc_nx_info(%d)*" + "create_nx_info(%d)*" + 2 4 "get_nx_info(%p[#%d.%d])" + "put_nx_info(%p[#%d.%d])" + 3 8 "claim_nx_info(%p[#%d.%d.%d]) %p" + "clr_nx_info(%p[#%d.%d])" + "init_nx_info(%p[#%d.%d])" + "release_nx_info(%p[#%d.%d.%d]) %p" + "set_nx_info(%p[#%d.%d])" + 4 10 "__hash_nx_info: %p[#%d]" + "__nx_dynamic_id: [#%d]" + "__unhash_nx_info: %p[#%d.%d.%d]" + 5 20 "moved task %p into nxi:%p[#%d]" + "nx_migrate_task(%p,%p[#%d.%d.%d])" + "task_get_nx_info(%p)" + 6 40 "nx_clear_persistent(%p[#%d])" + +debug_quota: + + 0 1 "quota_sync_dqh(%p,%d) discard inode %p" + 1 2 "quota_sync_dqh(%p,%d)" + "sync_dquots(%p,%d)" + "sync_dquots_dqh(%p,%d)" + 3 8 "do_quotactl(%p,%d,cmd=%d,id=%d,%p)" + +debug_switch: + + 0 1 "vc: VCMD_%02d_%d[%d], %d,%p [%d,%d,%x,%x]" + 1 2 "vc: VCMD_%02d_%d[%d] = %08lx(%ld) [%d,%d]" + 4 10 "%s: (%s %s) returned %s with %d" + +debug_tag: + + 7 80 "dx_parse_tag(»%s«): %d:#%d" + "dx_propagate_tag(%p[#%lu.%d]): %d,%d" + +debug_xid: + + 0 1 "__lookup_vx_info(#%u): %p[#%u]" + "alloc_vx_info(%d) = %p" + "alloc_vx_info(%d)*" + "create_vx_info(%d) (dynamic rejected)" + "create_vx_info(%d) = %p (already there)" + "create_vx_info(%d) = %p (new)" + "dealloc_vx_info(%p)" + "loc_vx_info(%d) = %p (found)" + "loc_vx_info(%d) = %p (new)" + "loc_vx_info(%d) = %p (not available)" + 1 2 "create_vx_info(%d)*" + "loc_vx_info(%d)*" + 2 4 "get_vx_info(%p[#%d.%d])" + "put_vx_info(%p[#%d.%d])" + 3 8 "claim_vx_info(%p[#%d.%d.%d]) %p" + "clr_vx_info(%p[#%d.%d])" + "init_vx_info(%p[#%d.%d])" + "release_vx_info(%p[#%d.%d.%d]) %p" + "set_vx_info(%p[#%d.%d])" + 4 10 "__hash_vx_info: %p[#%d]" + "__unhash_vx_info: %p[#%d.%d.%d]" + "__vx_dynamic_id: [#%d]" + 5 20 "enter_vx_info(%p[#%d],%p) %p[#%d,%p]" + "leave_vx_info(%p[#%d,%p]) %p[#%d,%p]" + "moved task %p into vxi:%p[#%d]" + "task_get_vx_info(%p)" + "vx_migrate_task(%p,%p[#%d.%d])" + 6 40 "vx_clear_persistent(%p[#%d])" + "vx_exit_init(%p[#%d],%p[#%d,%d,%d])" + "vx_set_init(%p[#%d],%p[#%d,%d,%d])" + "vx_set_persistent(%p[#%d])" + "vx_set_reaper(%p[#%d],%p[#%d,%d])" + 7 80 "vx_child_reaper(%p[#%u,%u]) = %p[#%u,%u]" + + +debug_limit: + + n 2^n "vx_acc_cres[%5d,%s,%2d]: %5d%s" + "vx_cres_avail[%5d,%s,%2d]: %5ld > %5d + %5d" + + m 2^m "vx_acc_page[%5d,%s,%2d]: %5d%s" + "vx_acc_pages[%5d,%s,%2d]: %5d += %5d" + "vx_pages_avail[%5d,%s,%2d]: %5ld > %5d + %5d" diff --git a/Makefile b/Makefile index 8071888..3889f72 100644 --- a/Makefile +++ b/Makefile @@ -245,8 +245,9 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -HOSTCXXFLAGS = -O2 +HOSTCFLAGS = -Wall -W -Wmissing-prototypes -Wstrict-prototypes -Wno-unused-parameter -Wno-missing-field-initializers -O2 -fomit-frame-pointer -fno-delete-null-pointer-checks +HOSTCFLAGS += $(call cc-option, -Wno-empty-body) +HOSTCXXFLAGS = -O2 -Wall -W -Wno-array-bounds # Decide whether to build built-in, modular, or both. # Normally, just do built-in. @@ -312,9 +313,15 @@ endif # If the user is running make -s (silent mode), suppress echoing of # commands +ifneq ($(filter 4.%,$(MAKE_VERSION)),) # make-4 +ifneq ($(filter %s ,$(firstword x$(MAKEFLAGS))),) + quiet=silent_ +endif +else # make-3.8x ifneq ($(findstring s,$(MAKEFLAGS)),) quiet=silent_ endif +endif export quiet Q KBUILD_VERBOSE @@ -407,8 +414,8 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exc # Rules shared between *config targets and build targets # Basic helpers built in scripts/ -PHONY += scripts_basic -scripts_basic: +PHONY += scripts_basic gcc-plugins +scripts_basic: gcc-plugins $(Q)$(MAKE) $(build)=scripts/basic $(Q)rm -f .tmp_quiet_recordmcount @@ -564,6 +571,75 @@ else KBUILD_CFLAGS += -O2 endif +# Tell gcc to never replace conditional load with a non-conditional one +KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) + +ifndef DISABLE_PAX_PLUGINS +ifeq ($(call cc-ifversion, -ge, 0408, y), y) +PLUGINCC := $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-plugin.sh "$(HOSTCXX)" "$(HOSTCXX)" "$(CC)") +else +PLUGINCC := $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-plugin.sh "$(HOSTCC)" "$(HOSTCXX)" "$(CC)") +endif +ifneq ($(PLUGINCC),) +ifdef CONFIG_PAX_CONSTIFY_PLUGIN +CONSTIFY_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/constify_plugin.so -DCONSTIFY_PLUGIN +endif +ifdef CONFIG_PAX_MEMORY_STACKLEAK +STACKLEAK_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/stackleak_plugin.so -DSTACKLEAK_PLUGIN +STACKLEAK_PLUGIN_CFLAGS += -fplugin-arg-stackleak_plugin-track-lowest-sp=100 +endif +ifdef CONFIG_KALLOCSTAT_PLUGIN +KALLOCSTAT_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/kallocstat_plugin.so +endif +ifdef CONFIG_PAX_KERNEXEC_PLUGIN +KERNEXEC_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/kernexec_plugin.so +KERNEXEC_PLUGIN_CFLAGS += -fplugin-arg-kernexec_plugin-method=$(CONFIG_PAX_KERNEXEC_PLUGIN_METHOD) -DKERNEXEC_PLUGIN +KERNEXEC_PLUGIN_AFLAGS := -DKERNEXEC_PLUGIN +endif +ifdef CONFIG_GRKERNSEC_RANDSTRUCT +RANDSTRUCT_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/randomize_layout_plugin.so -DRANDSTRUCT_PLUGIN +ifdef CONFIG_GRKERNSEC_RANDSTRUCT_PERFORMANCE +RANDSTRUCT_PLUGIN_CFLAGS += -fplugin-arg-randomize_layout_plugin-performance-mode +endif +endif +ifdef CONFIG_CHECKER_PLUGIN +ifeq ($(call cc-ifversion, -ge, 0406, y), y) +CHECKER_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/checker_plugin.so -DCHECKER_PLUGIN +endif +endif +COLORIZE_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/colorize_plugin.so +ifdef CONFIG_PAX_SIZE_OVERFLOW +SIZE_OVERFLOW_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/size_overflow_plugin/size_overflow_plugin.so -DSIZE_OVERFLOW_PLUGIN +endif +ifdef CONFIG_PAX_LATENT_ENTROPY +LATENT_ENTROPY_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/latent_entropy_plugin.so -DLATENT_ENTROPY_PLUGIN +endif +ifdef CONFIG_PAX_MEMORY_STRUCTLEAK +STRUCTLEAK_PLUGIN_CFLAGS := -fplugin=$(objtree)/tools/gcc/structleak_plugin.so -DSTRUCTLEAK_PLUGIN +endif +GCC_PLUGINS_CFLAGS := $(CONSTIFY_PLUGIN_CFLAGS) $(STACKLEAK_PLUGIN_CFLAGS) $(KALLOCSTAT_PLUGIN_CFLAGS) +GCC_PLUGINS_CFLAGS += $(KERNEXEC_PLUGIN_CFLAGS) $(CHECKER_PLUGIN_CFLAGS) $(COLORIZE_PLUGIN_CFLAGS) +GCC_PLUGINS_CFLAGS += $(SIZE_OVERFLOW_PLUGIN_CFLAGS) $(LATENT_ENTROPY_PLUGIN_CFLAGS) $(STRUCTLEAK_PLUGIN_CFLAGS) +GCC_PLUGINS_CFLAGS += $(RANDSTRUCT_PLUGIN_CFLAGS) +GCC_PLUGINS_AFLAGS := $(KERNEXEC_PLUGIN_AFLAGS) +export PLUGINCC CONSTIFY_PLUGIN LATENT_ENTROPY_PLUGIN_CFLAGS +ifeq ($(KBUILD_EXTMOD),) +gcc-plugins: + $(Q)$(MAKE) $(build)=tools/gcc +else +gcc-plugins: ; +endif +else +gcc-plugins: +ifeq ($(call cc-ifversion, -ge, 0405, y), y) + $(error Your gcc installation does not support plugins. If the necessary headers for plugin support are missing, they should be installed. On Debian, apt-get install gcc--plugin-dev. If you choose to ignore this error and lessen the improvements provided by this patch, re-run make with the DISABLE_PAX_PLUGINS=y argument.)) +else + $(Q)echo "warning, your gcc version does not support plugins, you should upgrade it to gcc 4.5 at least" +endif + $(Q)echo "PAX_MEMORY_STACKLEAK, constification, PAX_LATENT_ENTROPY and other features will be less secure. PAX_SIZE_OVERFLOW will not be active." +endif +endif + include $(srctree)/arch/$(SRCARCH)/Makefile ifneq ($(CONFIG_FRAME_WARN),0) @@ -596,7 +672,7 @@ KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments) ifdef CONFIG_DEBUG_INFO KBUILD_CFLAGS += -g -KBUILD_AFLAGS += -gdwarf-2 +KBUILD_AFLAGS += -Wa,--gdwarf-2 endif ifdef CONFIG_DEBUG_INFO_REDUCED @@ -710,7 +786,7 @@ export mod_strip_cmd ifeq ($(KBUILD_EXTMOD),) -core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ @@ -934,6 +1010,8 @@ vmlinux.o: $(modpost-init) $(vmlinux-main) FORCE # The actual objects are generated when descending, # make sure no implicit rule kicks in +$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; # Handle descending into subdirectories listed in $(vmlinux-dirs) @@ -943,7 +1021,7 @@ $(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; # Error messages still appears in the original language PHONY += $(vmlinux-dirs) -$(vmlinux-dirs): prepare scripts +$(vmlinux-dirs): gcc-plugins prepare scripts $(Q)$(MAKE) $(build)=$@ # Store (new) KERNELRELASE string in include/config/kernel.release @@ -983,10 +1061,13 @@ prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \ archprepare: archscripts prepare1 scripts_basic +prepare0: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +prepare0: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) prepare0: archprepare FORCE $(Q)$(MAKE) $(build)=. # All the preparing.. +prepare: KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) prepare: prepare0 # Generate some files @@ -1091,6 +1172,8 @@ all: modules # using awk while concatenating to the final file. PHONY += modules +modules: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +modules: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) modules.builtin $(Q)$(AWK) '!x[$$0]++' $(vmlinux-dirs:%=$(objtree)/%/modules.order) > $(objtree)/modules.order @$(kecho) ' Building modules, stage 2.'; @@ -1106,7 +1189,7 @@ modules.builtin: $(vmlinux-dirs:%=%/modules.builtin) # Target to prepare building external modules PHONY += modules_prepare -modules_prepare: prepare scripts +modules_prepare: gcc-plugins prepare scripts # Target to install modules PHONY += modules_install @@ -1130,7 +1213,7 @@ _modinst_: # boot a modules.dep even before / is mounted read-write. However the # boot script depmod is the master version. PHONY += _modinst_post -_modinst_post: _modinst_ +_modinst_post: include/config/kernel.release _modinst_ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.fwinst obj=firmware __fw_modinst $(call cmd,depmod) @@ -1166,6 +1249,9 @@ MRPROPER_DIRS += include/config usr/include include/generated \ arch/*/include/generated MRPROPER_FILES += .config .config.old .version .old_version \ include/linux/version.h \ + tools/gcc/size_overflow_plugin/size_overflow_hash_aux.h \ + tools/gcc/size_overflow_plugin/size_overflow_hash.h \ + tools/gcc/randomize_layout_seed.h \ Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS # clean - Delete most, but leave enough to build external modules @@ -1202,7 +1288,7 @@ distclean: mrproper @find $(srctree) $(RCS_FIND_IGNORE) \ \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ - -o -name '.*.rej' \ + -o -name '.*.rej' -o -name '*.so' \ -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \ -type f -print | xargs rm -f @@ -1256,6 +1342,7 @@ help: @echo ' gtags - Generate GNU GLOBAL index' @echo ' kernelrelease - Output the release version string' @echo ' kernelversion - Output the version stored in Makefile' + @echo ' image_name - Output the image name' @echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ echo ' (default: $(INSTALL_HDR_PATH))'; \ echo '' @@ -1363,6 +1450,8 @@ PHONY += $(module-dirs) modules $(module-dirs): crmodverdir $(objtree)/Module.symvers $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) +modules: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +modules: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) modules: $(module-dirs) @$(kecho) ' Building modules, stage 2.'; $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost @@ -1449,7 +1538,7 @@ export_report: endif #ifeq ($(config-targets),1) endif #ifeq ($(mixed-targets),1) -PHONY += checkstack kernelrelease kernelversion +PHONY += checkstack kernelrelease kernelversion image_name # UML needs a little special treatment here. It wants to use the host # toolchain, so needs $(SUBARCH) passed to checkstack.pl. Everyone @@ -1470,6 +1559,18 @@ kernelrelease: kernelversion: @echo $(KERNELVERSION) +image_name: + @echo $(KBUILD_IMAGE) + +# Clear a bunch of variables before executing the submake +tools/: FORCE + $(Q)mkdir -p $(objtree)/tools + $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ + +tools/%: FORCE + $(Q)mkdir -p $(objtree)/tools + $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $* + # Single targets # --------------------------------------------------------------------------- # Single targets are compatible with: @@ -1489,17 +1590,21 @@ else target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@)) endif -%.s: %.c prepare scripts FORCE +%.s: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +%.s: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) +%.s: %.c gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.i: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.o: %.c prepare scripts FORCE +%.o: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +%.o: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) +%.o: %.c gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.lst: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.s: %.S prepare scripts FORCE +%.s: %.S gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) -%.o: %.S prepare scripts FORCE +%.o: %.S gcc-plugins prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) %.symtypes: %.c prepare scripts FORCE $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) @@ -1509,11 +1614,15 @@ endif $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) -%/: prepare scripts FORCE +%/: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +%/: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) +%/: gcc-plugins prepare scripts FORCE $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) -%.ko: prepare scripts FORCE +%.ko: KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS) +%.ko: KBUILD_AFLAGS += $(GCC_PLUGINS_AFLAGS) +%.ko: gcc-plugins prepare scripts FORCE $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) $(@:.ko=.o) diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669c..7389b35 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -181,4 +181,28 @@ config HAVE_RCU_TABLE_FREE config ARCH_HAVE_NMI_SAFE_CMPXCHG bool +config HAVE_ARCH_SECCOMP_FILTER + bool + help + An arch should select this symbol if it provides all of these things: + - syscall_get_arch() + - syscall_get_arguments() + - syscall_rollback() + - syscall_set_return_value() + - SIGSYS siginfo_t support + - uses __secure_computing_int() or secure_computing() + - secure_computing is called from a ptrace_event()-safe context + - secure_computing return value is checked and a return value of -1 + results in the system call being skipped immediately. + +config SECCOMP_FILTER + def_bool y + depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET + help + Enable tasks to build secure computing environments defined + in terms of Berkeley Packet Filter programs which implement + task-defined system call filtering polices. + + See Documentation/prctl/seccomp_filter.txt for details. + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 3d74801..5eab5aa 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -665,6 +665,8 @@ config DUMMY_CONSOLE depends on VGA_HOSE default y +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 6f1aca7..fa956e0 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -250,6 +250,16 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic_dec(v) atomic_sub(1,(v)) #define atomic64_dec(v) atomic64_sub(1,(v)) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() #define smp_mb__before_atomic_inc() smp_mb() diff --git a/arch/alpha/include/asm/cache.h b/arch/alpha/include/asm/cache.h index ad368a9..fbe0f25 100644 --- a/arch/alpha/include/asm/cache.h +++ b/arch/alpha/include/asm/cache.h @@ -4,19 +4,19 @@ #ifndef __ARCH_ALPHA_CACHE_H #define __ARCH_ALPHA_CACHE_H +#include /* Bytes per L1 (data) cache line. */ #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_EV6) -# define L1_CACHE_BYTES 64 # define L1_CACHE_SHIFT 6 #else /* Both EV4 and EV5 are write-through, read-allocate, direct-mapped, physical. */ -# define L1_CACHE_BYTES 32 # define L1_CACHE_SHIFT 5 #endif +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index da5449e..7418343 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -90,6 +90,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (current->personality & ADDR_LIMIT_32BIT ? 0x10000 : 0x120000000UL) + +#define PAX_DELTA_MMAP_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 28) +#define PAX_DELTA_STACK_LEN (current->personality & ADDR_LIMIT_32BIT ? 14 : 19) +#endif + /* $0 is set by ld.so to a pointer to a function which might be registered using atexit. This provides a mean for the dynamic linker to call DT_FINI functions for shared libraries that have diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index bc2a0da..8ad11ee 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -29,6 +29,12 @@ pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) pgd_set(pgd, pmd); } +static inline void +pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +{ + pgd_populate(mm, pgd, pmd); +} + extern pgd_t *pgd_alloc(struct mm_struct *mm); static inline void diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index de98a73..bd4f1f8 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -101,6 +101,17 @@ struct vm_area_struct; #define PAGE_SHARED __pgprot(_PAGE_VALID | __ACCESS_BITS) #define PAGE_COPY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) #define PAGE_READONLY __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOE) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_VALID | __ACCESS_BITS | _PAGE_FOW | _PAGE_FOE) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + #define PAGE_KERNEL __pgprot(_PAGE_VALID | _PAGE_ASM | _PAGE_KRE | _PAGE_KWE) #define _PAGE_NORMAL(x) __pgprot(_PAGE_VALID | __ACCESS_BITS | (x)) diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 6d159ce..eecae79 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -860,24 +860,15 @@ sys_getxgid: .globl sys_getxpid .ent sys_getxpid sys_getxpid: + lda $sp, -16($sp) + stq $26, 0($sp) .prologue 0 - ldq $2, TI_TASK($8) - /* See linux/kernel/timer.c sys_getppid for discussion - about this loop. */ - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - ldl $0, TASK_TGID($2) -1: ldl $1, TASK_TGID($4) -#ifdef CONFIG_SMP - mov $4, $5 - mb - ldq $3, TASK_GROUP_LEADER($2) - ldq $4, TASK_REAL_PARENT($3) - cmpeq $4, $5, $5 - beq $5, 1b -#endif - stq $1, 80($sp) + lda $16, 96($sp) + jsr $26, do_getxpid + ldq $26, 0($sp) + + lda $sp, 16($sp) ret .end sys_getxpid diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c index 2fd00b7..cfd5069 100644 --- a/arch/alpha/kernel/module.c +++ b/arch/alpha/kernel/module.c @@ -160,7 +160,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 01e8715..6a5a03b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1138,16 +1138,16 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) generic version except that we know how to honor ADDR_LIMIT_32BIT. */ static unsigned long -arch_get_unmapped_area_1(unsigned long addr, unsigned long len, - unsigned long limit) +arch_get_unmapped_area_1(struct file *filp, unsigned long addr, unsigned long len, + unsigned long limit, unsigned long flags) { struct vm_area_struct *vma = find_vma(current->mm, addr); - + unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags); while (1) { /* At this point: (!vma || addr < vma->vm_end). */ if (limit - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, offset)) return addr; addr = vma->vm_end; vma = vma->vm_next; @@ -1183,20 +1183,24 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); + addr = arch_get_unmapped_area_1 (filp, PAGE_ALIGN(addr), len, limit, flags); if (addr != (unsigned long) -ENOMEM) return addr; } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + addr = arch_get_unmapped_area_1 (filp, PAGE_ALIGN(current->mm->mmap_base), len, limit, flags); + if (addr != (unsigned long) -ENOMEM) return addr; /* Finally, try allocating in low memory. */ - addr = arch_get_unmapped_area_1 (PAGE_SIZE, len, limit); + addr = arch_get_unmapped_area_1 (filp, PAGE_SIZE, len, limit, flags); return addr; } diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index e2af5eb..5002cd0 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index e534e1c..df045e4 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -446,7 +446,7 @@ sys_call_table: .quad sys_stat64 /* 425 */ .quad sys_lstat64 .quad sys_fstat64 - .quad sys_ni_syscall /* sys_vserver */ + .quad sys_vserver /* sys_vserver */ .quad sys_ni_syscall /* sys_mbind */ .quad sys_ni_syscall /* sys_get_mempolicy */ .quad sys_ni_syscall /* sys_set_mempolicy */ diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 0414e02..05e55d3 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -183,7 +183,8 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) #ifdef CONFIG_SMP printk("CPU %d ", hard_smp_processor_id()); #endif - printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); + printk("%s(%d[#%u]): %s %ld\n", current->comm, + task_pid_nr(current), current->xid, str, err); dik_show_regs(regs, r9_15); add_taint(TAINT_DIE); dik_show_trace((unsigned long *)(regs+1)); diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index e576b91..9b43be9 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -54,6 +54,124 @@ __load_new_mm_context(struct mm_struct *next_mm) __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long *)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -131,8 +249,29 @@ do_page_fault(unsigned long address, unsigned long mmcsr, good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void *)regs->pc, (void *)rdusp()); + do_group_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 082bd36..67d7d02 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2013,6 +2013,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on EXPERIMENTAL + depends on !GRKERNSEC_KMEM help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -2253,6 +2254,8 @@ source "fs/Kconfig" source "arch/arm/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index b7c5d5d..4b0c4ed 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -231,7 +231,7 @@ */ #ifdef CONFIG_THUMB2_KERNEL - .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=T() + .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=TUSER() 9999: .if \inc == 1 \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] @@ -271,7 +271,7 @@ #else /* !CONFIG_THUMB2_KERNEL */ - .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=T() + .macro usracc, instr, reg, ptr, inc, cond, rept, abort, t=TUSER() .rept \rept 9999: .if \inc == 1 diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 86976d0..15420e6 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -15,6 +15,10 @@ #include #include +#ifdef CONFIG_GENERIC_ATOMIC64 +#include +#endif + #define ATOMIC_INIT(i) { (i) } #ifdef __KERNEL__ @@ -25,7 +29,15 @@ * atomic_set() is the clrex or dummy strex done on every exception return. */ #define atomic_read(v) (*(volatile int *)&(v)->counter) +static inline int atomic_read_unchecked(const atomic_unchecked_t *v) +{ + return *(const volatile int *)&v->counter; +} #define atomic_set(v,i) (((v)->counter) = (i)) +static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i) +{ + v->counter = i; +} #if __LINUX_ARM_ARCH__ >= 6 @@ -40,6 +52,35 @@ static inline void atomic_add(int i, atomic_t *v) int result; __asm__ __volatile__("@ atomic_add\n" +"1: ldrex %1, [%3]\n" +" adds %0, %1, %4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + +" strex %1, %0, [%3]\n" +" teq %1, #0\n" +" bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + +static inline void atomic_add_unchecked(int i, atomic_unchecked_t *v) +{ + unsigned long tmp; + int result; + + __asm__ __volatile__("@ atomic_add_unchecked\n" "1: ldrex %0, [%3]\n" " add %0, %0, %4\n" " strex %1, %0, [%3]\n" @@ -58,6 +99,42 @@ static inline int atomic_add_return(int i, atomic_t *v) smp_mb(); __asm__ __volatile__("@ atomic_add_return\n" +"1: ldrex %1, [%3]\n" +" adds %0, %1, %4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +" mov %0, %1\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + +" strex %1, %0, [%3]\n" +" teq %1, #0\n" +" bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); + + smp_mb(); + + return result; +} + +static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ + unsigned long tmp; + int result; + + smp_mb(); + + __asm__ __volatile__("@ atomic_add_return_unchecked\n" "1: ldrex %0, [%3]\n" " add %0, %0, %4\n" " strex %1, %0, [%3]\n" @@ -78,6 +155,35 @@ static inline void atomic_sub(int i, atomic_t *v) int result; __asm__ __volatile__("@ atomic_sub\n" +"1: ldrex %1, [%3]\n" +" subs %0, %1, %4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + +" strex %1, %0, [%3]\n" +" teq %1, #0\n" +" bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + +static inline void atomic_sub_unchecked(int i, atomic_unchecked_t *v) +{ + unsigned long tmp; + int result; + + __asm__ __volatile__("@ atomic_sub_unchecked\n" "1: ldrex %0, [%3]\n" " sub %0, %0, %4\n" " strex %1, %0, [%3]\n" @@ -96,11 +202,25 @@ static inline int atomic_sub_return(int i, atomic_t *v) smp_mb(); __asm__ __volatile__("@ atomic_sub_return\n" -"1: ldrex %0, [%3]\n" -" sub %0, %0, %4\n" +"1: ldrex %1, [%3]\n" +" subs %0, %1, %4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +" mov %0, %1\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + " strex %1, %0, [%3]\n" " teq %1, #0\n" " bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "Ir" (i) : "cc"); @@ -132,6 +252,28 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) return oldval; } +static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *ptr, int old, int new) +{ + unsigned long oldval, res; + + smp_mb(); + + do { + __asm__ __volatile__("@ atomic_cmpxchg_unchecked\n" + "ldrex %1, [%3]\n" + "mov %0, #0\n" + "teq %1, %4\n" + "strexeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) + : "r" (&ptr->counter), "Ir" (old), "r" (new) + : "cc"); + } while (res); + + smp_mb(); + + return oldval; +} + static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) { unsigned long tmp, tmp2; @@ -165,7 +307,17 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } + +static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ + return atomic_add_return(i, v); +} + #define atomic_add(i, v) (void) atomic_add_return(i, v) +static inline void atomic_add_unchecked(int i, atomic_unchecked_t *v) +{ + (void) atomic_add_return(i, v); +} static inline int atomic_sub_return(int i, atomic_t *v) { @@ -180,6 +332,10 @@ static inline int atomic_sub_return(int i, atomic_t *v) return val; } #define atomic_sub(i, v) (void) atomic_sub_return(i, v) +static inline void atomic_sub_unchecked(int i, atomic_unchecked_t *v) +{ + (void) atomic_sub_return(i, v); +} static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { @@ -195,6 +351,11 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) return ret; } +static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new) +{ + return atomic_cmpxchg(v, old, new); +} + static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) { unsigned long flags; @@ -207,6 +368,10 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) #endif /* __LINUX_ARM_ARCH__ */ #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new) +{ + return xchg(&v->counter, new); +} static inline int __atomic_add_unless(atomic_t *v, int a, int u) { @@ -219,11 +384,27 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) } #define atomic_inc(v) atomic_add(1, v) +static inline void atomic_inc_unchecked(atomic_unchecked_t *v) +{ + atomic_add_unchecked(1, v); +} #define atomic_dec(v) atomic_sub(1, v) +static inline void atomic_dec_unchecked(atomic_unchecked_t *v) +{ + atomic_sub_unchecked(1, v); +} #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) +static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_return_unchecked(1, v) == 0; +} #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) #define atomic_inc_return(v) (atomic_add_return(1, v)) +static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_return_unchecked(1, v); +} #define atomic_dec_return(v) (atomic_sub_return(1, v)) #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) @@ -239,6 +420,14 @@ typedef struct { u64 __aligned(8) counter; } atomic64_t; +#ifdef CONFIG_PAX_REFCOUNT +typedef struct { + u64 __aligned(8) counter; +} atomic64_unchecked_t; +#else +typedef atomic64_t atomic64_unchecked_t; +#endif + #define ATOMIC64_INIT(i) { (i) } static inline u64 atomic64_read(atomic64_t *v) @@ -254,6 +443,19 @@ static inline u64 atomic64_read(atomic64_t *v) return result; } +static inline u64 atomic64_read_unchecked(atomic64_unchecked_t *v) +{ + u64 result; + + __asm__ __volatile__("@ atomic64_read_unchecked\n" +" ldrexd %0, %H0, [%1]" + : "=&r" (result) + : "r" (&v->counter), "Qo" (v->counter) + ); + + return result; +} + static inline void atomic64_set(atomic64_t *v, u64 i) { u64 tmp; @@ -268,6 +470,20 @@ static inline void atomic64_set(atomic64_t *v, u64 i) : "cc"); } +static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, u64 i) +{ + u64 tmp; + + __asm__ __volatile__("@ atomic64_set_unchecked\n" +"1: ldrexd %0, %H0, [%2]\n" +" strexd %0, %3, %H3, [%2]\n" +" teq %0, #0\n" +" bne 1b" + : "=&r" (tmp), "=Qo" (v->counter) + : "r" (&v->counter), "r" (i) + : "cc"); +} + static inline void atomic64_add(u64 i, atomic64_t *v) { u64 result; @@ -276,6 +492,36 @@ static inline void atomic64_add(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_add\n" "1: ldrexd %0, %H0, [%3]\n" " adds %0, %0, %4\n" +" adcs %H0, %H0, %H4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + +" strexd %1, %0, %H0, [%3]\n" +" teq %1, #0\n" +" bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) + : "r" (&v->counter), "r" (i) + : "cc"); +} + +static inline void atomic64_add_unchecked(u64 i, atomic64_unchecked_t *v) +{ + u64 result; + unsigned long tmp; + + __asm__ __volatile__("@ atomic64_add_unchecked\n" +"1: ldrexd %0, %H0, [%3]\n" +" adds %0, %0, %4\n" " adc %H0, %H0, %H4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" @@ -287,12 +533,49 @@ static inline void atomic64_add(u64 i, atomic64_t *v) static inline u64 atomic64_add_return(u64 i, atomic64_t *v) { - u64 result; - unsigned long tmp; + u64 result, tmp; smp_mb(); __asm__ __volatile__("@ atomic64_add_return\n" +"1: ldrexd %1, %H1, [%3]\n" +" adds %0, %1, %4\n" +" adcs %H0, %H1, %H4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +" mov %0, %1\n" +" mov %H0, %H1\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + +" strexd %1, %0, %H0, [%3]\n" +" teq %1, #0\n" +" bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) + : "r" (&v->counter), "r" (i) + : "cc"); + + smp_mb(); + + return result; +} + +static inline u64 atomic64_add_return_unchecked(u64 i, atomic64_unchecked_t *v) +{ + u64 result; + unsigned long tmp; + + smp_mb(); + + __asm__ __volatile__("@ atomic64_add_return_unchecked\n" "1: ldrexd %0, %H0, [%3]\n" " adds %0, %0, %4\n" " adc %H0, %H0, %H4\n" @@ -316,6 +599,36 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_sub\n" "1: ldrexd %0, %H0, [%3]\n" " subs %0, %0, %4\n" +" sbcs %H0, %H0, %H4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + +" strexd %1, %0, %H0, [%3]\n" +" teq %1, #0\n" +" bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) + : "r" (&v->counter), "r" (i) + : "cc"); +} + +static inline void atomic64_sub_unchecked(u64 i, atomic64_unchecked_t *v) +{ + u64 result; + unsigned long tmp; + + __asm__ __volatile__("@ atomic64_sub_unchecked\n" +"1: ldrexd %0, %H0, [%3]\n" +" subs %0, %0, %4\n" " sbc %H0, %H0, %H4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" @@ -327,18 +640,32 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) { - u64 result; - unsigned long tmp; + u64 result, tmp; smp_mb(); __asm__ __volatile__("@ atomic64_sub_return\n" -"1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +"1: ldrexd %1, %H1, [%3]\n" +" subs %0, %1, %4\n" +" sbcs %H0, %H1, %H4\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +" mov %0, %1\n" +" mov %H0, %H1\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" + +#ifdef CONFIG_PAX_REFCOUNT +"\n4:\n" + _ASM_EXTABLE(2b, 4b) +#endif + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "r" (i) : "cc"); @@ -372,6 +699,30 @@ static inline u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old, u64 new) return oldval; } +static inline u64 atomic64_cmpxchg_unchecked(atomic64_unchecked_t *ptr, u64 old, u64 new) +{ + u64 oldval; + unsigned long res; + + smp_mb(); + + do { + __asm__ __volatile__("@ atomic64_cmpxchg_unchecked\n" + "ldrexd %1, %H1, [%3]\n" + "mov %0, #0\n" + "teq %1, %4\n" + "teqeq %H1, %H4\n" + "strexdeq %0, %5, %H5, [%3]" + : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) + : "r" (&ptr->counter), "r" (old), "r" (new) + : "cc"); + } while (res); + + smp_mb(); + + return oldval; +} + static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) { u64 result; @@ -395,21 +746,34 @@ static inline u64 atomic64_xchg(atomic64_t *ptr, u64 new) static inline u64 atomic64_dec_if_positive(atomic64_t *v) { - u64 result; - unsigned long tmp; + u64 result, tmp; smp_mb(); __asm__ __volatile__("@ atomic64_dec_if_positive\n" -"1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, #1\n" -" sbc %H0, %H0, #0\n" +"1: ldrexd %1, %H1, [%3]\n" +" subs %0, %1, #1\n" +" sbcs %H0, %H1, #0\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +" mov %0, %1\n" +" mov %H0, %H1\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + " teq %H0, #0\n" -" bmi 2f\n" +" bmi 4f\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b\n" -"2:" +"4:\n" + +#ifdef CONFIG_PAX_REFCOUNT + _ASM_EXTABLE(2b, 4b) +#endif + : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter) : "cc"); @@ -432,13 +796,25 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) " teq %0, %5\n" " teqeq %H0, %H5\n" " moveq %1, #0\n" -" beq 2f\n" +" beq 4f\n" " adds %0, %0, %6\n" -" adc %H0, %H0, %H6\n" +" adcs %H0, %H0, %H6\n" + +#ifdef CONFIG_PAX_REFCOUNT +" bvc 3f\n" +"2: bkpt 0xf103\n" +"3:\n" +#endif + " strexd %2, %0, %H0, [%4]\n" " teq %2, #0\n" " bne 1b\n" -"2:" +"4:\n" + +#ifdef CONFIG_PAX_REFCOUNT + _ASM_EXTABLE(2b, 4b) +#endif + : "=&r" (val), "+r" (ret), "=&r" (tmp), "+Qo" (v->counter) : "r" (&v->counter), "r" (u), "r" (a) : "cc"); @@ -451,10 +827,13 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) +#define atomic64_inc_unchecked(v) atomic64_add_unchecked(1LL, (v)) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_return_unchecked(v) atomic64_add_return_unchecked(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_dec(v) atomic64_sub(1LL, (v)) +#define atomic64_dec_unchecked(v) atomic64_sub_unchecked(1LL, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index 75fe66b..2255c86 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -4,8 +4,10 @@ #ifndef __ASMARM_CACHE_H #define __ASMARM_CACHE_H +#include + #define L1_CACHE_SHIFT CONFIG_ARM_L1_CACHE_SHIFT -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* * Memory returned by kmalloc() may be used for DMA, so we must make diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index b1e0e07..071fe7c 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -108,7 +108,7 @@ struct cpu_cache_fns { void (*dma_unmap_area)(const void *, size_t, int); void (*dma_flush_range)(const void *, const void *); -}; +} __no_const; /* * Select the calling method diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index af18cea..b5dc173 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -83,9 +83,9 @@ * instructions (inline assembly) */ #ifdef CONFIG_CPU_USE_DOMAINS -#define T(instr) #instr "t" +#define TUSER(instr) #instr "t" #else -#define T(instr) #instr +#define TUSER(instr) #instr #endif #else /* __ASSEMBLY__ */ @@ -95,9 +95,9 @@ * instructions */ #ifdef CONFIG_CPU_USE_DOMAINS -#define T(instr) instr ## t +#define TUSER(instr) instr ## t #else -#define T(instr) instr +#define TUSER(instr) instr #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 0e9ce8d..6ef1e03 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -116,7 +116,14 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x00008000UL + +#define PAX_DELTA_MMAP_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) +#define PAX_DELTA_STACK_LEN ((current->personality == PER_LINUX_32BIT) ? 16 : 10) +#endif /* When the program starts, a1 contains a pointer to a function to be registered with atexit, as per the SVR4 ABI. A value of 0 means we @@ -126,10 +133,6 @@ int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); extern void elf_set_personality(const struct elf32_hdr *); #define SET_PERSONALITY(ex) elf_set_personality(&(ex)) -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - extern int vectors_user_mapping(void); #define arch_setup_additional_pages(bprm, uses_interp) vectors_user_mapping() #define ARCH_HAS_SETUP_ADDITIONAL_PAGES diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index aefd459..e37af12 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -70,9 +70,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1, [%3]\n" \ + "1: " TUSER(ldr) " %1, [%3]\n" \ " " insn "\n" \ - "2: " T(str) " %0, [%3]\n" \ + "2: " TUSER(str) " %0, [%3]\n" \ " mov %0, #0\n" \ __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ @@ -90,10 +90,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: " T(ldr) " %1, [%4]\n" + "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" " it eq @ explicit IT needed for the 2b label\n" - "2: " T(streq) " %3, [%4]\n" + "2: " TUSER(streq) " %3, [%4]\n" __futex_atomic_ex_table("%5") : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index e51b1e8..32a3113 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -21,6 +21,7 @@ enum km_type { KM_L1_CACHE, KM_L2_CACHE, KM_KDB, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index a8997d7..f0a29154 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -268,7 +268,8 @@ static inline __deprecated void *bus_to_virt(unsigned long x) #define ARCH_PFN_OFFSET PHYS_PFN_OFFSET #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) +#define virt_addr_valid(kaddr) (((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory) \ + && pfn_valid(__pa(kaddr) >> PAGE_SHIFT) ) /* * Optional coherency support. Currently used only by selected diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index 53426c6..c7baff3 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -35,7 +35,7 @@ struct outer_cache_fns { #endif void (*set_debug)(unsigned long); void (*resume)(void); -}; +} __no_const; #ifdef CONFIG_OUTER_CACHE diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index ca94653..9c398dc 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -23,6 +23,7 @@ #else +#include #include /* @@ -123,7 +124,7 @@ struct cpu_user_fns { void (*cpu_clear_user_highpage)(struct page *page, unsigned long vaddr); void (*cpu_copy_user_highpage)(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma); -}; +} __no_const; #ifdef MULTI_USER extern struct cpu_user_fns cpu_user; diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 3e08fd3..3f14f89 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -31,6 +31,7 @@ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(mm, pmd) do { } while (0) #define pgd_populate(mm,pmd,pte) BUG() +#define pgd_populate_kernel(mm,pmd,pte) BUG() extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index fcbac3c..6e9b464 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -26,6 +26,9 @@ #include +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + /* * Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 96187ff..7a9b049 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -72,7 +72,7 @@ * ARMv7 groups of PSR bits */ #define APSR_MASK 0xf80f0000 /* N, Z, C, V, Q and GE flags */ -#define PSR_ISET_MASK 0x01000010 /* ISA state (J, T) mask */ +#define PSR_ISET_MASK 0x01000020 /* ISA state (J, T) mask */ #define PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ #define PSR_ENDIAN_MASK 0x00000200 /* Endianness state mask */ diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 984014b..a6d914f 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -90,6 +90,8 @@ void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, #define xchg(ptr,x) \ ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg_unchecked(ptr,x) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) extern asmlinkage void c_backtrace(unsigned long fp, int pmode); @@ -101,7 +103,7 @@ extern int __pure cpu_architecture(void); extern void cpu_init(void); void arm_machine_restart(char mode, const char *cmd); -extern void (*arm_pm_restart)(char str, const char *cmd); +extern void (*arm_pm_restart)(char str, const char *cmd) __noreturn; #define UDBG_UNDEFINED (1 << 0) #define UDBG_SYSCALL (1 << 1) @@ -526,6 +528,13 @@ static inline unsigned long long __cmpxchg64_mb(volatile void *ptr, #endif /* __LINUX_ARM_ARCH__ >= 6 */ +#define _ASM_EXTABLE(from, to) \ +" .pushsection __ex_table,\"a\"\n"\ +" .align 3\n" \ +" .long " #from ", " #to"\n" \ +" .popsection" + + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 7b5cc8d..5d70d88 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -139,6 +139,12 @@ extern void vfp_flush_hwstate(struct thread_info *); #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_SYSCALL_TRACE 8 + +/* within 8 bits of TIF_SYSCALL_TRACE + to meet flexible second operand requirements +*/ +#define TIF_GRSEC_SETXID 9 + #define TIF_POLLING_NRFLAG 16 #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -155,6 +161,10 @@ extern void vfp_flush_hwstate(struct thread_info *); #define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_GRSEC_SETXID (1 << TIF_GRSEC_SETXID) + +/* Checks for any syscall work in entry-common.S */ +#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_GRSEC_SETXID) /* * Change these and you break ASM code in entry-common.S diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 18a2858..21db277 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -202,6 +202,7 @@ static inline void set_fs(mm_segment_t fs) #endif /* CONFIG_MMU */ +#define access_ok_noprefault(type,addr,size) access_ok((type),(addr),(size)) #define access_ok(type,addr,size) (__range_ok(addr,size) == 0) /* @@ -242,7 +243,7 @@ do { \ #define __get_user_asm_byte(x,addr,err) \ __asm__ __volatile__( \ - "1: " T(ldrb) " %1,[%2],#0\n" \ + "1: " TUSER(ldrb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -278,7 +279,7 @@ do { \ #define __get_user_asm_word(x,addr,err) \ __asm__ __volatile__( \ - "1: " T(ldr) " %1,[%2],#0\n" \ + "1: " TUSER(ldr) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -323,7 +324,7 @@ do { \ #define __put_user_asm_byte(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: " T(strb) " %1,[%2],#0\n" \ + "1: " TUSER(strb) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -356,7 +357,7 @@ do { \ #define __put_user_asm_word(x,__pu_addr,err) \ __asm__ __volatile__( \ - "1: " T(str) " %1,[%2],#0\n" \ + "1: " TUSER(str) " %1,[%2],#0\n" \ "2:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -381,10 +382,10 @@ do { \ #define __put_user_asm_dword(x,__pu_addr,err) \ __asm__ __volatile__( \ - ARM( "1: " T(str) " " __reg_oper1 ", [%1], #4\n" ) \ - ARM( "2: " T(str) " " __reg_oper0 ", [%1]\n" ) \ - THUMB( "1: " T(str) " " __reg_oper1 ", [%1]\n" ) \ - THUMB( "2: " T(str) " " __reg_oper0 ", [%1, #4]\n" ) \ + ARM( "1: " TUSER(str) " " __reg_oper1 ", [%1], #4\n" ) \ + ARM( "2: " TUSER(str) " " __reg_oper0 ", [%1]\n" ) \ + THUMB( "1: " TUSER(str) " " __reg_oper1 ", [%1]\n" ) \ + THUMB( "2: " TUSER(str) " " __reg_oper0 ", [%1, #4]\n" ) \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -402,8 +403,21 @@ do { \ #ifdef CONFIG_MMU -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long n); + +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return ___copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + return ___copy_to_user(to, from, n); +} + extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n); @@ -418,6 +432,9 @@ extern unsigned long __must_check __strnlen_user(const char __user *s, long n); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); else /* security hole - plug it */ @@ -427,6 +444,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 5b0bce6..becd81c 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -95,8 +95,8 @@ EXPORT_SYMBOL(__strncpy_from_user); #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(___copy_from_user); +EXPORT_SYMBOL(___copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__get_user_1); diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 463ff4a..8913128 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -322,7 +322,7 @@ /* 310 */ CALL(sys_request_key) CALL(sys_keyctl) CALL(ABI(sys_semtimedop, sys_oabi_semtimedop)) -/* vserver */ CALL(sys_ni_syscall) + CALL(sys_vserver) CALL(sys_ioprio_set) /* 315 */ CALL(sys_ioprio_get) CALL(sys_inotify_init) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index b2a27b6..520889c 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -87,7 +87,7 @@ ENTRY(ret_from_fork) get_thread_info tsk ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing mov why, #1 - tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? + tst r1, #_TIF_SYSCALL_WORK @ are we tracing syscalls? beq ret_slow_syscall mov r1, sp mov r0, #1 @ trace exit [IP = 1] @@ -443,7 +443,7 @@ ENTRY(vector_swi) 1: #endif - tst r10, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? + tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3606e85..44ba19d 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -46,7 +46,9 @@ .equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE .macro pgtbl, rd, phys - add \rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE + mov \rd, #TEXT_OFFSET + sub \rd, #PG_DIR_SIZE + add \rd, \rd, \phys .endm #ifdef CONFIG_XIP_KERNEL diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 2bc1a8e..f433c88 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -986,7 +986,7 @@ static int __cpuinit dbg_reset_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata dbg_reset_nb = { +static struct notifier_block dbg_reset_nb = { .notifier_call = dbg_reset_notify, }; diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1e9be5d..4e0f470 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -39,6 +39,8 @@ #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { + if (!size || PAGE_ALIGN(size) > MODULES_END - MODULES_VADDR) + return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, PAGE_KERNEL_EXEC, -1, __builtin_return_address(0)); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d9e3c61..29c89e8 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -92,7 +91,7 @@ static int __init hlt_setup(char *__unused) __setup("nohlt", nohlt_setup); __setup("hlt", hlt_setup); -void arm_machine_restart(char mode, const char *cmd) +__noreturn void arm_machine_restart(char mode, const char *cmd) { /* Disable interrupts first */ local_irq_disable(); @@ -135,7 +134,7 @@ void arm_machine_restart(char mode, const char *cmd) void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -void (*arm_pm_restart)(char str, const char *cmd) = arm_machine_restart; +void (*arm_pm_restart)(char str, const char *cmd) __noreturn = arm_machine_restart; EXPORT_SYMBOL_GPL(arm_pm_restart); static void do_nothing(void *unused) @@ -250,6 +249,7 @@ void machine_power_off(void) machine_shutdown(); if (pm_power_off) pm_power_off(); + BUG(); } void machine_restart(char *cmd) @@ -268,8 +268,8 @@ void __show_regs(struct pt_regs *regs) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->ARM_lr); + printk("PC is at %pA\n", (void *)instruction_pointer(regs)); + printk("LR is at %pA\n", (void *)regs->ARM_lr); printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" "sp : %08lx ip : %08lx fp : %08lx\n", regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr, @@ -322,7 +322,8 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); + printk("Pid: %d[#%u], comm: %20s\n", + task_pid_nr(current), current->xid, current->comm); __show_regs(regs); dump_stack(); } @@ -489,12 +490,6 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} - #ifdef CONFIG_MMU /* * The vectors page is always readable from user space for the diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 90fa8b3..a3a2212 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -904,10 +904,19 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } +#ifdef CONFIG_GRKERNSEC_SETXID +extern void gr_delayed_cred_worker(void); +#endif + asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) { unsigned long ip; +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + if (!test_thread_flag(TIF_SYSCALL_TRACE)) return scno; if (!(current->ptrace & PT_PTRACED)) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d45fd22..45bae6d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -63,7 +63,7 @@ static void dump_mem(const char *, const char *, unsigned long, unsigned long); void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame) { #ifdef CONFIG_KALLSYMS - printk("[<%08lx>] (%pS) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from); + printk("[<%08lx>] (%pA) from [<%08lx>] (%pA)\n", where, (void *)where, from, (void *)from); #else printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from); #endif @@ -250,8 +250,8 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt print_modules(); __show_regs(regs); - printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + printk(KERN_EMERG "Process %.*s (pid: %d:#%u, stack limit = 0x%p)\n", + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), tsk->xid, thread + 1); if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, @@ -265,6 +265,8 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt static DEFINE_RAW_SPINLOCK(die_lock); +extern void gr_handle_kernel_exploit(void); + /* * This function is protected against re-entrancy. */ @@ -294,6 +296,9 @@ void die(const char *str, struct pt_regs *regs, int err) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); + + gr_handle_kernel_exploit(); + if (ret != NOTIFY_STOP) do_exit(SIGSEGV); } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 20b3041..da44b1f 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -103,6 +103,8 @@ SECTIONS ARM_CPU_KEEP(PROC_INFO) } + _etext = .; /* End of text section */ + RO_DATA(PAGE_SIZE) #ifdef CONFIG_ARM_UNWIND @@ -122,8 +124,6 @@ SECTIONS } #endif - _etext = .; /* End of text and rodata section */ - #ifndef CONFIG_XIP_KERNEL . = ALIGN(PAGE_SIZE); __init_begin = .; diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 66a477a..bee61d3 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -16,7 +16,7 @@ /* * Prototype: * - * size_t __copy_from_user(void *to, const void *from, size_t n) + * size_t ___copy_from_user(void *to, const void *from, size_t n) * * Purpose: * @@ -84,11 +84,11 @@ .text -ENTRY(__copy_from_user) +ENTRY(___copy_from_user) #include "copy_template.S" -ENDPROC(__copy_from_user) +ENDPROC(___copy_from_user) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ee2f67..d1cce76 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -10,6 +10,7 @@ * ASM optimised string functions */ #include +#include #include #include #include diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index d066df6..df28194 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -16,7 +16,7 @@ /* * Prototype: * - * size_t __copy_to_user(void *to, const void *from, size_t n) + * size_t ___copy_to_user(void *to, const void *from, size_t n) * * Purpose: * @@ -88,11 +88,11 @@ .text ENTRY(__copy_to_user_std) -WEAK(__copy_to_user) +WEAK(___copy_to_user) #include "copy_template.S" -ENDPROC(__copy_to_user) +ENDPROC(___copy_to_user) ENDPROC(__copy_to_user_std) .pushsection .fixup,"ax" diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 4306fbf..9b06bb4 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -34,7 +34,7 @@ ENTRY(__get_user_1) check_uaccess r0, 1, r1, r2, __get_user_bad -1: T(ldrb) r2, [r0] +1: TUSER(ldrb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_1) @@ -61,7 +61,7 @@ ENDPROC(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad -4: T(ldr) r2, [r0] +4: TUSER(ldr) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__get_user_4) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 9a897fa..3d73dcb 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -34,7 +34,7 @@ ENTRY(__put_user_1) check_uaccess r0, 1, r1, ip, __put_user_bad -1: T(strb) r2, [r0] +1: TUSER(strb) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_1) @@ -44,19 +44,19 @@ ENTRY(__put_user_2) mov ip, r2, lsr #8 #ifdef CONFIG_THUMB2_KERNEL #ifndef __ARMEB__ -2: T(strb) r2, [r0] -3: T(strb) ip, [r0, #1] +2: TUSER(strb) r2, [r0] +3: TUSER(strb) ip, [r0, #1] #else -2: T(strb) ip, [r0] -3: T(strb) r2, [r0, #1] +2: TUSER(strb) ip, [r0] +3: TUSER(strb) r2, [r0, #1] #endif #else /* !CONFIG_THUMB2_KERNEL */ #ifndef __ARMEB__ -2: T(strb) r2, [r0], #1 -3: T(strb) ip, [r0] +2: TUSER(strb) r2, [r0], #1 +3: TUSER(strb) ip, [r0] #else -2: T(strb) ip, [r0], #1 -3: T(strb) r2, [r0] +2: TUSER(strb) ip, [r0], #1 +3: TUSER(strb) r2, [r0] #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 @@ -65,7 +65,7 @@ ENDPROC(__put_user_2) ENTRY(__put_user_4) check_uaccess r0, 4, r1, ip, __put_user_bad -4: T(str) r2, [r0] +4: TUSER(str) r2, [r0] mov r0, #0 mov pc, lr ENDPROC(__put_user_4) @@ -73,11 +73,11 @@ ENDPROC(__put_user_4) ENTRY(__put_user_8) check_uaccess r0, 8, r1, ip, __put_user_bad #ifdef CONFIG_THUMB2_KERNEL -5: T(str) r2, [r0] -6: T(str) r3, [r0, #4] +5: TUSER(str) r2, [r0] +6: TUSER(str) r3, [r0, #4] #else -5: T(str) r2, [r0], #4 -6: T(str) r3, [r0] +5: TUSER(str) r2, [r0], #4 +6: TUSER(str) r3, [r0] #endif mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S index d0ece2a..e712687 100644 --- a/arch/arm/lib/uaccess.S +++ b/arch/arm/lib/uaccess.S @@ -20,7 +20,7 @@ #define PAGE_SHIFT 12 -/* Prototype: int __copy_to_user(void *to, const char *from, size_t n) +/* Prototype: int ___copy_to_user(void *to, const char *from, size_t n) * Purpose : copy a block to user memory from kernel memory * Params : to - user memory * : from - kernel memory @@ -32,15 +32,15 @@ rsb ip, ip, #4 cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault sub r2, r2, ip b .Lc2u_dest_aligned -ENTRY(__copy_to_user) +ENTRY(___copy_to_user) stmfd sp!, {r2, r4 - r7, lr} cmp r2, #4 blt .Lc2u_not_enough @@ -59,7 +59,7 @@ ENTRY(__copy_to_user) addmi ip, r2, #4 bmi .Lc2u_0nowords ldr r3, [r1], #4 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -88,18 +88,18 @@ USER( T(str) r3, [r0], #4) @ May fault stmneia r0!, {r3 - r4} @ Shouldnt fault tst ip, #4 ldrne r3, [r1], #4 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_0fupi .Lc2u_0nowords: teq ip, #0 beq .Lc2u_finished .Lc2u_nowords: cmp ip, #2 ldrb r3, [r1], #1 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #1 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_not_enough: @@ -120,7 +120,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #8 ldr r7, [r1], #4 orr r3, r3, r7, push #24 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -155,18 +155,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #8 ldrne r7, [r1], #4 orrne r3, r3, r7, push #24 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_1fupi .Lc2u_1nowords: mov r3, r7, get_byte_1 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_2 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault movgt r3, r7, get_byte_3 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_2fupi: subs r2, r2, #4 @@ -175,7 +175,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #16 ldr r7, [r1], #4 orr r3, r3, r7, push #16 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -210,18 +210,18 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #16 ldrne r7, [r1], #4 orrne r3, r3, r7, push #16 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_2fupi .Lc2u_2nowords: mov r3, r7, get_byte_2 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault movge r3, r7, get_byte_3 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished .Lc2u_3fupi: subs r2, r2, #4 @@ -230,7 +230,7 @@ USER( T(strgtb) r3, [r0], #1) @ May fault mov r3, r7, pull #24 ldr r7, [r1], #4 orr r3, r3, r7, push #8 -USER( T(str) r3, [r0], #4) @ May fault +USER( TUSER( str) r3, [r0], #4) @ May fault mov ip, r0, lsl #32 - PAGE_SHIFT rsb ip, ip, #0 movs ip, ip, lsr #32 - PAGE_SHIFT @@ -265,27 +265,27 @@ USER( T(str) r3, [r0], #4) @ May fault movne r3, r7, pull #24 ldrne r7, [r1], #4 orrne r3, r3, r7, push #8 - T(strne) r3, [r0], #4 @ Shouldnt fault + TUSER( strne) r3, [r0], #4 @ Shouldnt fault ands ip, ip, #3 beq .Lc2u_3fupi .Lc2u_3nowords: mov r3, r7, get_byte_3 teq ip, #0 beq .Lc2u_finished cmp ip, #2 -USER( T(strb) r3, [r0], #1) @ May fault +USER( TUSER( strb) r3, [r0], #1) @ May fault ldrgeb r3, [r1], #1 -USER( T(strgeb) r3, [r0], #1) @ May fault +USER( TUSER( strgeb) r3, [r0], #1) @ May fault ldrgtb r3, [r1], #0 -USER( T(strgtb) r3, [r0], #1) @ May fault +USER( TUSER( strgtb) r3, [r0], #1) @ May fault b .Lc2u_finished -ENDPROC(__copy_to_user) +ENDPROC(___copy_to_user) .pushsection .fixup,"ax" .align 0 9001: ldmfd sp!, {r0, r4 - r7, pc} .popsection -/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n); +/* Prototype: unsigned long ___copy_from_user(void *to,const void *from,unsigned long n); * Purpose : copy a block from user memory to kernel memory * Params : to - kernel memory * : from - user memory @@ -295,16 +295,16 @@ ENDPROC(__copy_to_user) .Lcfu_dest_not_aligned: rsb ip, ip, #4 cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 sub r2, r2, ip b .Lcfu_dest_aligned -ENTRY(__copy_from_user) +ENTRY(___copy_from_user) stmfd sp!, {r0, r2, r4 - r7, lr} cmp r2, #4 blt .Lcfu_not_enough @@ -322,7 +322,7 @@ ENTRY(__copy_from_user) .Lcfu_0fupi: subs r2, r2, #4 addmi ip, r2, #4 bmi .Lcfu_0nowords -USER( T(ldr) r3, [r1], #4) +USER( TUSER( ldr) r3, [r1], #4) str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction rsb ip, ip, #0 @@ -351,18 +351,18 @@ USER( T(ldr) r3, [r1], #4) ldmneia r1!, {r3 - r4} @ Shouldnt fault stmneia r0!, {r3 - r4} tst ip, #4 - T(ldrne) r3, [r1], #4 @ Shouldnt fault + TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault strne r3, [r0], #4 ands ip, ip, #3 beq .Lcfu_0fupi .Lcfu_0nowords: teq ip, #0 beq .Lcfu_finished .Lcfu_nowords: cmp ip, #2 -USER( T(ldrb) r3, [r1], #1) @ May fault +USER( TUSER( ldrb) r3, [r1], #1) @ May fault strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -375,7 +375,7 @@ USER( T(ldrgtb) r3, [r1], #1) @ May fault .Lcfu_src_not_aligned: bic r1, r1, #3 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault cmp ip, #2 bgt .Lcfu_3fupi beq .Lcfu_2fupi @@ -383,7 +383,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_1nowords mov r3, r7, pull #8 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #24 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -418,7 +418,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #8 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #24 strne r3, [r0], #4 ands ip, ip, #3 @@ -438,7 +438,7 @@ USER( T(ldrne) r7, [r1], #4) @ May fault addmi ip, r2, #4 bmi .Lcfu_2nowords mov r3, r7, pull #16 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #16 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -474,7 +474,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #16 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #16 strne r3, [r0], #4 ands ip, ip, #3 @@ -486,7 +486,7 @@ USER( T(ldrne) r7, [r1], #4) @ May fault strb r3, [r0], #1 movge r3, r7, get_byte_3 strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #0) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished @@ -494,7 +494,7 @@ USER( T(ldrgtb) r3, [r1], #0) @ May fault addmi ip, r2, #4 bmi .Lcfu_3nowords mov r3, r7, pull #24 -USER( T(ldr) r7, [r1], #4) @ May fault +USER( TUSER( ldr) r7, [r1], #4) @ May fault orr r3, r3, r7, push #8 str r3, [r0], #4 mov ip, r1, lsl #32 - PAGE_SHIFT @@ -529,7 +529,7 @@ USER( T(ldr) r7, [r1], #4) @ May fault stmneia r0!, {r3 - r4} tst ip, #4 movne r3, r7, pull #24 -USER( T(ldrne) r7, [r1], #4) @ May fault +USER( TUSER( ldrne) r7, [r1], #4) @ May fault orrne r3, r3, r7, push #8 strne r3, [r0], #4 ands ip, ip, #3 @@ -539,12 +539,12 @@ USER( T(ldrne) r7, [r1], #4) @ May fault beq .Lcfu_finished cmp ip, #2 strb r3, [r0], #1 -USER( T(ldrgeb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault strgeb r3, [r0], #1 -USER( T(ldrgtb) r3, [r1], #1) @ May fault +USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault strgtb r3, [r0], #1 b .Lcfu_finished -ENDPROC(__copy_from_user) +ENDPROC(___copy_from_user) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 025f742..8432b08 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -104,7 +104,7 @@ out: } unsigned long -__copy_to_user(void __user *to, const void *from, unsigned long n) +___copy_to_user(void __user *to, const void *from, unsigned long n) { /* * This test is stubbed out of the main function above to keep diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index e9d5f4a..f099699 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -593,7 +593,7 @@ static int n8x0_menelaus_late_init(struct device *dev) } #endif -static struct menelaus_platform_data n8x0_menelaus_platform_data __initdata = { +static struct menelaus_platform_data n8x0_menelaus_platform_data __initconst = { .late_init = n8x0_menelaus_late_init, }; diff --git a/arch/arm/mach-omap2/smartreflex.h b/arch/arm/mach-omap2/smartreflex.h index 5f35b9e..6d09f99 100644 --- a/arch/arm/mach-omap2/smartreflex.h +++ b/arch/arm/mach-omap2/smartreflex.h @@ -183,7 +183,7 @@ struct omap_sr_class_data { int (*notify)(struct voltagedomain *voltdm, u32 status); u8 notify_flags; u8 class_type; -}; +} __do_const; /** * struct omap_sr_nvalue_table - Smartreflex n-target value info diff --git a/arch/arm/mach-ux500/mbox-db5500.c b/arch/arm/mach-ux500/mbox-db5500.c index 2b2d51c..0127490 100644 --- a/arch/arm/mach-ux500/mbox-db5500.c +++ b/arch/arm/mach-ux500/mbox-db5500.c @@ -168,7 +168,7 @@ static ssize_t mbox_read_fifo(struct device *dev, return sprintf(buf, "0x%X\n", mbox_value); } -static DEVICE_ATTR(fifo, S_IWUGO | S_IRUGO, mbox_read_fifo, mbox_write_fifo); +static DEVICE_ATTR(fifo, S_IWUSR | S_IRUGO, mbox_read_fifo, mbox_write_fifo); static int mbox_show(struct seq_file *s, void *data) { diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 4b0bc37..d556b08 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -386,6 +386,33 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } #endif /* CONFIG_MMU */ +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (__force unsigned char __user *)pc+i)) + printk(KERN_CONT "?? "); + else + printk(KERN_CONT "%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-4: "); + for (i = -1; i < 20; i++) { + unsigned long c; + if (get_user(c, (__force unsigned long __user *)sp+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08lx ", c); + } + printk("\n"); +} +#endif + /* * First Level Translation Fault Handler * @@ -630,6 +657,20 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); struct siginfo info; +#ifdef CONFIG_PAX_REFCOUNT + if (fsr_fs(ifsr) == 2) { + unsigned int bkpt; + + if (!probe_kernel_address((unsigned int *)addr, bkpt) && bkpt == 0xe12f1073) { + current->thread.error_code = ifsr; + current->thread.trap_no = 0; + pax_report_refcount_overflow(regs); + fixup_exception(regs); + return; + } + } +#endif + if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) return; diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 44b628e..be706ee 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -33,6 +33,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long start_addr; int do_align = 0; int aliasing = cache_is_vipt_aliasing(); + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); /* * We only need to do colour alignment if either the I or D @@ -54,6 +55,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -61,16 +66,20 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; } + +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + /* 8 bits of randomness in 20 address space bits */ if ((current->flags & PF_RANDOMIZE) && !(current->personality & ADDR_NO_RANDOMIZE)) @@ -89,14 +98,14 @@ full_search: * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* * Remember the place where we stopped the search: */ @@ -111,7 +120,6 @@ full_search: } } - /* * You really shouldn't be using read() or write() on /dev/mem. This * might go away in the future. diff --git a/arch/arm/plat-samsung/include/plat/dma-ops.h b/arch/arm/plat-samsung/include/plat/dma-ops.h index 4c1a363..df311d0 100644 --- a/arch/arm/plat-samsung/include/plat/dma-ops.h +++ b/arch/arm/plat-samsung/include/plat/dma-ops.h @@ -41,7 +41,7 @@ struct samsung_dma_ops { int (*started)(unsigned ch); int (*flush)(unsigned ch); int (*stop)(unsigned ch); -}; +} __no_const; extern void *samsung_dmadev_get_ops(void); extern void *s3c_dma_get_ops(void); diff --git a/arch/arm/plat-samsung/include/plat/ehci.h b/arch/arm/plat-samsung/include/plat/ehci.h index 5f28cae..3d23723 100644 --- a/arch/arm/plat-samsung/include/plat/ehci.h +++ b/arch/arm/plat-samsung/include/plat/ehci.h @@ -14,7 +14,7 @@ struct s5p_ehci_platdata { int (*phy_init)(struct platform_device *pdev, int type); int (*phy_exit)(struct platform_device *pdev, int type); -}; +} __no_const; extern void s5p_ehci_set_platdata(struct s5p_ehci_platdata *pd); diff --git a/arch/avr32/include/asm/cache.h b/arch/avr32/include/asm/cache.h index c3a58a1..78fbf54 100644 --- a/arch/avr32/include/asm/cache.h +++ b/arch/avr32/include/asm/cache.h @@ -1,8 +1,10 @@ #ifndef __ASM_AVR32_CACHE_H #define __ASM_AVR32_CACHE_H +#include + #define L1_CACHE_SHIFT 5 -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* * Memory returned by kmalloc() may be used for DMA, so we must make diff --git a/arch/avr32/include/asm/elf.h b/arch/avr32/include/asm/elf.h index 3b3159b..425ea94d 100644 --- a/arch/avr32/include/asm/elf.h +++ b/arch/avr32/include/asm/elf.h @@ -84,8 +84,14 @@ typedef struct user_fpu_struct elf_fpregset_t; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x00001000UL + +#define PAX_DELTA_MMAP_LEN 15 +#define PAX_DELTA_STACK_LEN 15 +#endif /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/avr32/include/asm/kmap_types.h b/arch/avr32/include/asm/kmap_types.h index b7f5c68..556135c 100644 --- a/arch/avr32/include/asm/kmap_types.h +++ b/arch/avr32/include/asm/kmap_types.h @@ -22,7 +22,8 @@ D(10) KM_IRQ0, D(11) KM_IRQ1, D(12) KM_SOFTIRQ0, D(13) KM_SOFTIRQ1, -D(14) KM_TYPE_NR +D(14) KM_CLEARPAGE, +D(15) KM_TYPE_NR }; #undef D diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index 632b649..043ddd2 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -41,6 +41,23 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap) int exception_trace = 1; +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%02x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -158,6 +175,16 @@ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) { + pax_report_fault(regs, (void *)regs->pc, (void *)regs->sp); + do_group_exit(SIGKILL); + } + } +#endif + if (exception_trace && printk_ratelimit()) printk("%s%s[%d]: segfault at %08lx pc %08lx " "sp %08lx ecr %lu\n", diff --git a/arch/blackfin/include/asm/cache.h b/arch/blackfin/include/asm/cache.h index 568885a..f8008df 100644 --- a/arch/blackfin/include/asm/cache.h +++ b/arch/blackfin/include/asm/cache.h @@ -7,6 +7,7 @@ #ifndef __ARCH_BLACKFIN_CACHE_H #define __ARCH_BLACKFIN_CACHE_H +#include #include /* for asmlinkage */ /* @@ -14,7 +15,7 @@ * Blackfin loads 32 bytes for cache */ #define L1_CACHE_SHIFT 5 -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES #define ARCH_DMA_MINALIGN L1_CACHE_BYTES diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 408b055..5ad9a98 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -678,6 +678,8 @@ source "drivers/staging/Kconfig" source "arch/cris/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/cris/include/arch-v10/arch/cache.h b/arch/cris/include/arch-v10/arch/cache.h index aea2718..3639a60 100644 --- a/arch/cris/include/arch-v10/arch/cache.h +++ b/arch/cris/include/arch-v10/arch/cache.h @@ -1,8 +1,9 @@ #ifndef _ASM_ARCH_CACHE_H #define _ASM_ARCH_CACHE_H +#include /* Etrax 100LX have 32-byte cache-lines. */ -#define L1_CACHE_BYTES 32 #define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #endif /* _ASM_ARCH_CACHE_H */ diff --git a/arch/cris/include/arch-v32/arch/cache.h b/arch/cris/include/arch-v32/arch/cache.h index 1de779f..336fad3 100644 --- a/arch/cris/include/arch-v32/arch/cache.h +++ b/arch/cris/include/arch-v32/arch/cache.h @@ -1,11 +1,12 @@ #ifndef _ASM_CRIS_ARCH_CACHE_H #define _ASM_CRIS_ARCH_CACHE_H +#include #include /* A cache-line is 32 bytes. */ -#define L1_CACHE_BYTES 32 #define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define __read_mostly __attribute__((__section__(".data.read_mostly"))) diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 0d8a7d6..d0c9ff5 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -241,6 +241,16 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v); #define atomic64_cmpxchg(v, old, new) (__cmpxchg_64(old, new, &(v)->counter)) #define atomic64_xchg(v, new) (__xchg_64(new, &(v)->counter)) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; diff --git a/arch/frv/include/asm/cache.h b/arch/frv/include/asm/cache.h index 2797163..c2a401df9 100644 --- a/arch/frv/include/asm/cache.h +++ b/arch/frv/include/asm/cache.h @@ -12,10 +12,11 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H +#include /* bytes per L1 cache line */ #define L1_CACHE_SHIFT (CONFIG_FRV_L1_CACHE_SHIFT) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define __cacheline_aligned __attribute__((aligned(L1_CACHE_BYTES))) #define ____cacheline_aligned __attribute__((aligned(L1_CACHE_BYTES))) diff --git a/arch/frv/include/asm/kmap_types.h b/arch/frv/include/asm/kmap_types.h index f8e16b2..c73ff79 100644 --- a/arch/frv/include/asm/kmap_types.h +++ b/arch/frv/include/asm/kmap_types.h @@ -23,6 +23,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/frv/kernel/kernel_thread.S b/arch/frv/kernel/kernel_thread.S index 4531c83..fab3b42 100644 --- a/arch/frv/kernel/kernel_thread.S +++ b/arch/frv/kernel/kernel_thread.S @@ -37,7 +37,7 @@ kernel_thread: # start by forking the current process, but with shared VM setlos.p #__NR_clone,gr7 ; syscall number - ori gr10,#CLONE_VM,gr8 ; first syscall arg [clone_flags] + ori gr10,#CLONE_KT,gr8 ; first syscall arg [clone_flags] sethi.p #0xe4e4,gr9 ; second syscall arg [newsp] setlo #0xe4e4,gr9 setlos.p #0,gr10 ; third syscall arg [parent_tidptr] diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c index 385fd30..27cf8ba 100644 --- a/arch/frv/mm/elf-fdpic.c +++ b/arch/frv/mm/elf-fdpic.c @@ -61,6 +61,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi { struct vm_area_struct *vma; unsigned long limit; + unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags); if (len > TASK_SIZE) return -ENOMEM; @@ -73,8 +74,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(current->mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) goto success; } @@ -89,7 +89,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi for (; vma; vma = vma->vm_next) { if (addr > limit) break; - if (addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, offset)) goto success; addr = vma->vm_end; } @@ -104,7 +104,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi for (; vma; vma = vma->vm_next) { if (addr > limit) break; - if (addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, offset)) goto success; addr = vma->vm_end; } diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index d1f377f..62af587 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -213,6 +213,8 @@ source "fs/Kconfig" source "arch/h8300/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/h8300/include/asm/cache.h b/arch/h8300/include/asm/cache.h index c635028..6d9445a 100644 --- a/arch/h8300/include/asm/cache.h +++ b/arch/h8300/include/asm/cache.h @@ -1,8 +1,10 @@ #ifndef __ARCH_H8300_CACHE_H #define __ARCH_H8300_CACHE_H +#include + /* bytes per L1 cache line */ -#define L1_CACHE_BYTES 4 +#define L1_CACHE_BYTES _AC(4,UL) /* m68k-elf-gcc 2.95.2 doesn't like these */ diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h index 0f01de2..d37d309 100644 --- a/arch/hexagon/include/asm/cache.h +++ b/arch/hexagon/include/asm/cache.h @@ -21,9 +21,11 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H +#include + /* Bytes per L1 cache line */ -#define L1_CACHE_SHIFT (5) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define __cacheline_aligned __aligned(L1_CACHE_BYTES) #define ____cacheline_aligned __aligned(L1_CACHE_BYTES) diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 18c4f0b..2c2d8624 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -264,7 +264,7 @@ void free_thread_info(struct thread_info *ti) void thread_info_cache_init(void) { thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, - THREAD_SIZE, 0, NULL); + THREAD_SIZE, SLAB_USERCOPY, NULL); BUG_ON(thread_info_cache == NULL); } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 27489b6..e007fac 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -570,6 +570,7 @@ source "drivers/sn/Kconfig" config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) + depends on !GRKERNSEC_KMEM help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -657,6 +658,8 @@ source "fs/Kconfig" source "arch/ia64/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index be7bfa1..32098c5 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -101,5 +101,6 @@ endef archprepare: make_nr_irqs_h FORCE PHONY += make_nr_irqs_h FORCE +make_nr_irqs_h: KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS)) make_nr_irqs_h: FORCE $(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 2fc214b..7597423 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -209,6 +209,16 @@ atomic64_add_negative (__s64 i, atomic64_t *v) #define atomic64_inc(v) atomic64_add(1, (v)) #define atomic64_dec(v) atomic64_sub(1, (v)) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + /* Atomic operations are already serializing */ #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h index 988254a..e1ee885 100644 --- a/arch/ia64/include/asm/cache.h +++ b/arch/ia64/include/asm/cache.h @@ -1,6 +1,7 @@ #ifndef _ASM_IA64_CACHE_H #define _ASM_IA64_CACHE_H +#include /* * Copyright (C) 1998-2000 Hewlett-Packard Co @@ -9,7 +10,7 @@ /* Bytes per L1 (data) cache line. */ #define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #ifdef CONFIG_SMP # define SMP_CACHE_SHIFT L1_CACHE_SHIFT diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h index b5298eb..67c6e62 100644 --- a/arch/ia64/include/asm/elf.h +++ b/arch/ia64/include/asm/elf.h @@ -42,6 +42,13 @@ */ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (current->personality == PER_LINUX32 ? 0x08048000UL : 0x4000000000000000UL) + +#define PAX_DELTA_MMAP_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#define PAX_DELTA_STACK_LEN (current->personality == PER_LINUX32 ? 16 : 3*PAGE_SHIFT - 13) +#endif + #define PT_IA_64_UNWIND 0x70000001 /* IA-64 relocations: */ diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 96a8d92..617a1cf 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -39,6 +39,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) pgd_val(*pgd_entry) = __pa(pud); } +static inline void +pgd_populate_kernel(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) +{ + pgd_populate(mm, pgd_entry, pud); +} + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { return quicklist_alloc(0, GFP_KERNEL, NULL); @@ -57,6 +63,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) pud_val(*pud_entry) = __pa(pmd); } +static inline void +pud_populate_kernel(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) +{ + pud_populate(mm, pud_entry, pmd); +} + static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { return quicklist_alloc(0, GFP_KERNEL, NULL); diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 1a97af3..7529d31 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -12,7 +12,7 @@ * David Mosberger-Tang */ - +#include #include #include #include @@ -143,6 +143,17 @@ #define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) #define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) #define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) +# define PAGE_READONLY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +# define PAGE_COPY_NOEXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_READONLY_NOEXEC PAGE_READONLY +# define PAGE_COPY_NOEXEC PAGE_COPY +#endif + #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index fba7696..a7650fd 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -320,7 +320,7 @@ struct thread_struct { regs->loadrs = 0; \ regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ - if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) { \ + if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) { \ /* \ * Zap scratch regs to avoid leaking bits between processes with different \ * uid/privileges. \ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index b77768d..e0795eb 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -72,7 +72,7 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); - ACCESS_ONCE(*p) = (tmp + 2) & ~1; + ACCESS_ONCE_RW(*p) = (tmp + 2) & ~1; } static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock) diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 449c8c0..3d4b1e9 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -70,6 +70,7 @@ && ((segment).seg == KERNEL_DS.seg \ || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT))); \ }) +#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size)) #define access_ok(type, addr, size) __access_ok((addr), (size), get_fs()) /* @@ -240,12 +241,24 @@ extern unsigned long __must_check __copy_user (void __user *to, const void __use static inline unsigned long __copy_to_user (void __user *to, const void *from, unsigned long count) { + if (count > INT_MAX) + return count; + + if (!__builtin_constant_p(count)) + check_object_size(from, count, true); + return __copy_user(to, (__force void __user *) from, count); } static inline unsigned long __copy_from_user (void *to, const void __user *from, unsigned long count) { + if (count > INT_MAX) + return count; + + if (!__builtin_constant_p(count)) + check_object_size(to, count, false); + return __copy_user((__force void __user *) to, from, count); } @@ -255,10 +268,13 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) ({ \ void __user *__cu_to = (to); \ const void *__cu_from = (from); \ - long __cu_len = (n); \ + unsigned long __cu_len = (n); \ \ - if (__access_ok(__cu_to, __cu_len, get_fs())) \ + if (__cu_len <= INT_MAX && __access_ok(__cu_to, __cu_len, get_fs())) { \ + if (!__builtin_constant_p(n)) \ + check_object_size(__cu_from, __cu_len, true); \ __cu_len = __copy_user(__cu_to, (__force void __user *) __cu_from, __cu_len); \ + } \ __cu_len; \ }) @@ -266,11 +282,14 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) ({ \ void *__cu_to = (to); \ const void __user *__cu_from = (from); \ - long __cu_len = (n); \ + unsigned long __cu_len = (n); \ \ __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) \ + if (__cu_len <= INT_MAX && __access_ok(__cu_from, __cu_len, get_fs())) { \ + if (!__builtin_constant_p(n)) \ + check_object_size(__cu_to, __cu_len, false); \ __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ + } \ __cu_len; \ }) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 1ccbe12..8d2c517 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1714,7 +1714,7 @@ sys_call_table: data8 sys_mq_notify data8 sys_mq_getsetattr data8 sys_kexec_load - data8 sys_ni_syscall // reserved for vserver + data8 sys_vserver data8 sys_waitid // 1270 data8 sys_add_key data8 sys_request_key diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index c539c68..c95d3db 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -256,7 +256,7 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata err_inject_cpu_notifier = +static struct notifier_block err_inject_cpu_notifier = { .notifier_call = err_inject_cpu_callback, }; diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 782c3a35..3540c5e 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -23,7 +23,6 @@ #include #include #include -#include /* for rand_initialize_irq() */ #include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 9b97303..69464a9 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1919,7 +1919,7 @@ static int __cpuinit mca_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block mca_cpu_notifier __cpuinitdata = { +static struct notifier_block mca_cpu_notifier = { .notifier_call = mca_cpu_callback }; diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 24603be..948052d 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -307,8 +307,7 @@ plt_target (struct plt_entry *plt) void module_free (struct module *mod, void *module_region) { - if (mod && mod->arch.init_unw_table && - module_region == mod->module_init) { + if (mod && mod->arch.init_unw_table && module_region == mod->module_init_rx) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } @@ -494,15 +493,39 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, } static inline int +in_init_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rx < mod->init_size_rx; +} + +static inline int +in_init_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_init_rw < mod->init_size_rw; +} + +static inline int in_init (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_init < mod->init_size; + return in_init_rx(mod, addr) || in_init_rw(mod, addr); +} + +static inline int +in_core_rx (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rx < mod->core_size_rx; +} + +static inline int +in_core_rw (const struct module *mod, uint64_t addr) +{ + return addr - (uint64_t) mod->module_core_rw < mod->core_size_rw; } static inline int in_core (const struct module *mod, uint64_t addr) { - return addr - (uint64_t) mod->module_core < mod->core_size; + return in_core_rx(mod, addr) || in_core_rw(mod, addr); } static inline int @@ -685,7 +708,14 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, break; case RV_BDREL: - val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); + if (in_init_rx(mod, val)) + val -= (uint64_t) mod->module_init_rx; + else if (in_init_rw(mod, val)) + val -= (uint64_t) mod->module_init_rw; + else if (in_core_rx(mod, val)) + val -= (uint64_t) mod->module_core_rx; + else if (in_core_rw(mod, val)) + val -= (uint64_t) mod->module_core_rw; break; case RV_LTV: @@ -820,15 +850,15 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind * addresses have been selected... */ uint64_t gp; - if (mod->core_size > MAX_LTOFF) + if (mod->core_size_rx + mod->core_size_rw > MAX_LTOFF) /* * This takes advantage of fact that SHF_ARCH_SMALL gets allocated * at the end of the module. */ - gp = mod->core_size - MAX_LTOFF / 2; + gp = mod->core_size_rx + mod->core_size_rw - MAX_LTOFF / 2; else - gp = mod->core_size / 2; - gp = (uint64_t) mod->module_core + ((gp + 7) & -8); + gp = (mod->core_size_rx + mod->core_size_rw) / 2; + gp = (uint64_t) mod->module_core_rx + ((gp + 7) & -8); mod->arch.gp = gp; DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); } diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 77597e5..189dd62f 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -977,7 +977,7 @@ create_palinfo_proc_entries(unsigned int cpu) struct proc_dir_entry **pdir; struct proc_dir_entry *cpu_dir; int j; - char cpustr[sizeof(CPUSTR)]; + char cpustr[3+4+1]; /* @@ -1045,7 +1045,7 @@ static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __refdata palinfo_cpu_notifier = +static struct notifier_block palinfo_cpu_notifier = { .notifier_call = palinfo_cpu_callback, .priority = 0, diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 89accc6..236d389 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -632,6 +632,7 @@ static struct file_system_type pfm_fs_type = { .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("pfmfs"); DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); @@ -2370,7 +2371,6 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, vma_pages(vma)); up_write(&task->mm->mmap_sem); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6d33c5c..15bb2d9 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -109,8 +109,8 @@ show_regs (struct pt_regs *regs) unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current), - smp_processor_id(), current->comm); + printk("\nPid: %d[#%u], CPU %d, comm: %20s\n", task_pid_nr(current), + current->xid, smp_processor_id(), current->comm); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), init_utsname()->release); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 8848f43..739648d 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 79802e5..1a89ec5 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -616,7 +616,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu return NOTIFY_OK; } -static struct notifier_block salinfo_cpu_notifier __cpuinitdata = +static struct notifier_block salinfo_cpu_notifier = { .notifier_call = salinfo_cpu_callback, .priority = 0, diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 609d500..254a3d7 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -28,6 +28,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len unsigned long start_addr, align_mask = PAGE_SIZE - 1; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); if (len > RGN_MAP_LIMIT) return -ENOMEM; @@ -43,6 +44,13 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; #endif + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + addr = mm->free_area_cache; + else +#endif + if (!addr) addr = mm->free_area_cache; @@ -61,14 +69,14 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { + if (start_addr != mm->mmap_base) { /* Start a new search --- just in case we missed some holes. */ - addr = TASK_UNMAPPED_BASE; + addr = mm->mmap_base; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* Remember the address where we stopped this search: */ mm->free_area_cache = addr + len; return addr; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 9be1f11..f2eef30 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -444,7 +444,7 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cache_cpu_notifier = +static struct notifier_block cache_cpu_notifier = { .notifier_call = cache_cpu_callback }; diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index fd80e70..e72de06 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -59,8 +59,9 @@ die (const char *str, struct pt_regs *regs, long err) put_cpu(); if (++die.lock_owner_depth < 3) { - printk("%s[%d]: %s %ld [%d]\n", - current->comm, task_pid_nr(current), str, err, ++die_counter); + printk("%s[%d[#%u]]: %s %ld [%d]\n", + current->comm, task_pid_nr(current), current->xid, + str, err, ++die_counter); if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) != NOTIFY_STOP) show_regs(regs); @@ -323,8 +324,9 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) { last.time = current_jiffies + 5 * HZ; printk(KERN_WARNING - "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", - current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr); + "%s(%d[#%u]): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, task_pid_nr(current), current->xid, + regs->cr_iip + ia64_psr(regs)->ri, isr); } } } diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 53c0ba0..2accdde 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -199,7 +199,7 @@ SECTIONS { /* Per-cpu data: */ . = ALIGN(PERCPU_PAGE_SIZE); PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) - __phys_per_cpu_start = __per_cpu_load; + __phys_per_cpu_start = per_cpu_load; /* * ensure percpu data fits * into percpu page size diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 1e362cd..3ad6444 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -73,6 +73,23 @@ mapped_kernel_page_is_present (unsigned long address) return pte_present(pte); } +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + void __kprobes ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) { @@ -146,9 +163,23 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); - if ((vma->vm_flags & mask) != mask) + if ((vma->vm_flags & mask) != mask) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(vma->vm_flags & VM_EXEC) && (mask & VM_EXEC)) { + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->cr_iip) + goto bad_area; + + up_read(&mm->mmap_sem); + pax_report_fault(regs, (void *)regs->cr_iip, (void *)regs->r12); + do_group_exit(SIGKILL); + } +#endif + goto bad_area; + } + /* * If for any reason at all we couldn't handle the fault, make * sure we exit gracefully rather than endlessly redo the diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 5ca674b..0d1395a 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -149,6 +149,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u unsigned long pgoff, unsigned long flags) { struct vm_area_struct *vmm; + unsigned long offset = gr_rand_threadstack_offset(current->mm, file, flags); if (len > RGN_MAP_LIMIT) return -ENOMEM; @@ -171,7 +172,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u /* At this point: (!vmm || addr < vmm->vm_end). */ if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT) return -ENOMEM; - if (!vmm || (addr + len) <= vmm->vm_start) + if (check_heap_stack_gap(vmm, &addr, len, offset)) return addr; addr = ALIGN(vmm->vm_end, HPAGE_SIZE); } diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 00cb0e2..2ad8024 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -120,6 +120,19 @@ ia64_init_addr_space (void) vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT; + +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm->pax_flags & MF_PAX_PAGEEXEC) { + vma->vm_flags &= ~VM_EXEC; + +#ifdef CONFIG_PAX_MPROTECT + if (current->mm->pax_flags & MF_PAX_MPROTECT) + vma->vm_flags &= ~VM_MAYEXEC; +#endif + + } +#endif + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); down_write(¤t->mm->mmap_sem); if (insert_vm_struct(current->mm, vma)) { diff --git a/arch/m32r/include/asm/cache.h b/arch/m32r/include/asm/cache.h index 40b3ee98..8c2c112 100644 --- a/arch/m32r/include/asm/cache.h +++ b/arch/m32r/include/asm/cache.h @@ -1,8 +1,10 @@ #ifndef _ASM_M32R_CACHE_H #define _ASM_M32R_CACHE_H +#include + /* L1 cache line size */ #define L1_CACHE_SHIFT 4 -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #endif /* _ASM_M32R_CACHE_H */ diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index ee6a919..039b47f 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -196,8 +196,9 @@ static void show_registers(struct pt_regs *regs) } else { printk("SPI: %08lx\n", sp); } - printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)", - current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current); + printk("Process %s (pid: %d[#%u], process nr: %d, stackpage=%08lx)", + current->comm, task_pid_nr(current), current->xid, + 0xffff & i, 4096+(unsigned long)current); /* * When in-kernel, we also print out the stack and code at the diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c index 82abd15..d95ae5d 100644 --- a/arch/m32r/lib/usercopy.c +++ b/arch/m32r/lib/usercopy.c @@ -14,6 +14,9 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + prefetch(from); if (access_ok(VERIFY_WRITE, to, n)) __copy_user(to,from,n); @@ -23,6 +26,9 @@ __generic_copy_to_user(void __user *to, const void *from, unsigned long n) unsigned long __generic_copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + prefetchw(to); if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to,from,n); diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 361d540..5816ad5 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -135,6 +135,8 @@ source "fs/Kconfig" source "arch/m68k/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/m68k/include/asm/cache.h b/arch/m68k/include/asm/cache.h index 0395c51..5f26031 100644 --- a/arch/m68k/include/asm/cache.h +++ b/arch/m68k/include/asm/cache.h @@ -4,9 +4,11 @@ #ifndef __ARCH_M68K_CACHE_H #define __ARCH_M68K_CACHE_H +#include + /* bytes per L1 cache line */ #define L1_CACHE_SHIFT 4 -#define L1_CACHE_BYTES (1<< L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define ARCH_DMA_MINALIGN L1_CACHE_BYTES diff --git a/arch/microblaze/include/asm/cache.h b/arch/microblaze/include/asm/cache.h index 4efe96a..60e8699 100644 --- a/arch/microblaze/include/asm/cache.h +++ b/arch/microblaze/include/asm/cache.h @@ -13,11 +13,12 @@ #ifndef _ASM_MICROBLAZE_CACHE_H #define _ASM_MICROBLAZE_CACHE_H +#include #include #define L1_CACHE_SHIFT 5 /* word-granular cache in microblaze */ -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d46f1da..f9f6690 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2254,6 +2254,7 @@ source "kernel/Kconfig.preempt" config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on EXPERIMENTAL + depends on !GRKERNSEC_KMEM help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -2478,6 +2479,8 @@ source "fs/Kconfig" source "arch/mips/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 1d93f81..67794d0 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -21,6 +21,10 @@ #include #include +#ifdef CONFIG_GENERIC_ATOMIC64 +#include +#endif + #define ATOMIC_INIT(i) { (i) } /* @@ -765,6 +769,16 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) */ #define atomic64_add_negative(i, v) (atomic64_add_return(i, (v)) < 0) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + #endif /* CONFIG_64BIT */ /* diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h index b4db69f..8f3b093 100644 --- a/arch/mips/include/asm/cache.h +++ b/arch/mips/include/asm/cache.h @@ -9,10 +9,11 @@ #ifndef _ASM_CACHE_H #define _ASM_CACHE_H +#include #include #define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define SMP_CACHE_SHIFT L1_CACHE_SHIFT #define SMP_CACHE_BYTES L1_CACHE_BYTES diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 455c0ac..ad65fbe 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -372,13 +372,16 @@ extern const char *__elf_platform; #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) #endif +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #endif /* _ASM_ELF_H */ diff --git a/arch/mips/include/asm/hw_irq.h b/arch/mips/include/asm/hw_irq.h index 9e8ef59..1139d6b 100644 --- a/arch/mips/include/asm/hw_irq.h +++ b/arch/mips/include/asm/hw_irq.h @@ -10,7 +10,7 @@ #include -extern atomic_t irq_err_count; +extern atomic_unchecked_t irq_err_count; /* * interrupt-retrigger: NOP for now. This may not be appropriate for all diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 94fde8d..d5825cf 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -12,15 +12,25 @@ typedef struct atomic_long_t a; } local_t; +typedef struct { + atomic_long_unchecked_t a; +} local_unchecked_t; + #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) +#define local_read_unchecked(l) atomic_long_read_unchecked(&(l)->a) #define local_set(l, i) atomic_long_set(&(l)->a, (i)) +#define local_set_unchecked(l, i) atomic_long_set_unchecked(&(l)->a, (i)) #define local_add(i, l) atomic_long_add((i), (&(l)->a)) +#define local_add_unchecked(i, l) atomic_long_add_unchecked((i), (&(l)->a)) #define local_sub(i, l) atomic_long_sub((i), (&(l)->a)) +#define local_sub_unchecked(i, l) atomic_long_sub_unchecked((i), (&(l)->a)) #define local_inc(l) atomic_long_inc(&(l)->a) +#define local_inc_unchecked(l) atomic_long_inc_unchecked(&(l)->a) #define local_dec(l) atomic_long_dec(&(l)->a) +#define local_dec_unchecked(l) atomic_long_dec_unchecked(&(l)->a) /* * Same as above, but return the result value @@ -69,6 +79,7 @@ static __inline__ long local_add_return(long i, local_t * l) return result; } +#define local_add_return_unchecked(i, l) atomic_long_add_return_unchecked((i), (&(l)->a)) static __inline__ long local_sub_return(long i, local_t * l) { @@ -114,9 +125,12 @@ static __inline__ long local_sub_return(long i, local_t * l) return result; } +#define local_sub_return_unchecked(i, l) atomic_long_sub_return_unchecked((i), (&(l)->a)) #define local_cmpxchg(l, o, n) \ ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_cmpxchg_unchecked(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n))) /** diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index e59cd1a..8e329d6 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -93,7 +93,7 @@ extern void copy_user_highpage(struct page *to, struct page *from, #ifdef CONFIG_CPU_MIPS32 typedef struct { unsigned long pte_low, pte_high; } pte_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) - #define __pte(x) ({ pte_t __pte = {(x), ((unsigned long long)(x)) >> 32}; __pte; }) + #define __pte(x) ({ pte_t __pte = {(x), (x) >> 32}; __pte; }) #else typedef struct { unsigned long long pte; } pte_t; #define pte_val(x) ((x).pte) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 881d18b4..cea38bc 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -37,6 +37,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { set_pud(pud, __pud((unsigned long)pmd)); } + +static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_populate(mm, pud, pmd); +} #endif /* diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index b2202a6..6780030 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -18,6 +18,9 @@ #include #include +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + struct mm_struct; struct vm_area_struct; diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index 6018c80..7c37203 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -230,6 +230,6 @@ extern void per_cpu_trap_init(void); */ #define __ARCH_WANT_UNLOCKED_CTXSW -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) #endif /* _ASM_SYSTEM_H */ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 35d1b47..b16efed 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -114,6 +114,8 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define TIF_SECCOMP 4 /* secure computing */ #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ +/* li takes a 32bit immediate */ +#define TIF_GRSEC_SETXID 10 /* update credentials on syscall entry/exit */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -148,17 +150,18 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define _TIF_32BIT_ADDR (1< #include #include diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index ff44823..97f8906 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -52,6 +52,13 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; #undef ELF_ET_DYN_BASE #define ELF_ET_DYN_BASE (TASK32_SIZE / 3 * 2) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (TASK_IS_32BIT_ADDR ? 0x00400000UL : 0x00400000UL) + +#define PAX_DELTA_MMAP_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#define PAX_DELTA_STACK_LEN (TASK_IS_32BIT_ADDR ? 27-PAGE_SHIFT : 36-PAGE_SHIFT) +#endif + #include /* diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c index 32b397b..3a5143a 100644 --- a/arch/mips/kernel/i8259.c +++ b/arch/mips/kernel/i8259.c @@ -205,7 +205,7 @@ spurious_8259A_irq: printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is diff --git a/arch/mips/kernel/irq-gt641xx.c b/arch/mips/kernel/irq-gt641xx.c index 883fc6c..28c0acd 100644 --- a/arch/mips/kernel/irq-gt641xx.c +++ b/arch/mips/kernel/irq-gt641xx.c @@ -110,7 +110,7 @@ void gt641xx_irq_dispatch(void) } } - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); } void __init gt641xx_irq_init(void) diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c index 7f50318..20685b9 100644 --- a/arch/mips/kernel/irq.c +++ b/arch/mips/kernel/irq.c @@ -111,7 +111,10 @@ void __init init_IRQ(void) #endif } + #ifdef DEBUG_STACKOVERFLOW +extern void gr_handle_kernel_exploit(void); + static inline void check_stack_overflow(void) { unsigned long sp; @@ -127,6 +130,7 @@ static inline void check_stack_overflow(void) printk("do_IRQ: stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); + gr_handle_kernel_exploit(); } } #else diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index bf128d7..bc244d6 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -479,15 +479,3 @@ unsigned long get_wchan(struct task_struct *task) out: return pc; } - -/* - * Don't forget that the stack pointer must be aligned on a 8 bytes - * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. - */ -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; - - return sp & ALMASK; -} diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index e56c692..520fc02 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -264,6 +265,9 @@ long arch_ptrace(struct task_struct *child, long request, void __user *datavp = (void __user *) data; unsigned long __user *datalp = (void __user *) data; + if (!vx_check(vx_task_xid(child), VS_WATCH_P | VS_IDENT)) + goto out; + switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -530,6 +534,10 @@ static inline int audit_arch(void) return arch; } +#ifdef CONFIG_GRKERNSEC_SETXID +extern void gr_delayed_cred_worker(void); +#endif + /* * Notification of system call entry/exit * - triggered by current->work.syscall_trace @@ -539,6 +547,11 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) /* do the secure computing check first */ secure_computing(regs->regs[2]); +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + if (!(current->ptrace & PT_PTRACED)) goto out; diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c index 07fc524..b9d7f28 100644 --- a/arch/mips/kernel/reset.c +++ b/arch/mips/kernel/reset.c @@ -13,6 +13,7 @@ #include #include +#include /* * Urgs ... Too many MIPS machines to handle this in a generic way. @@ -29,16 +30,19 @@ void machine_restart(char *command) { if (_machine_restart) _machine_restart(command); + BUG(); } void machine_halt(void) { if (_machine_halt) _machine_halt(); + BUG(); } void machine_power_off(void) { if (pm_power_off) pm_power_off(); + BUG(); } diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 1d39bea..e2d730d 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -523,7 +523,7 @@ einval: li v0, -ENOSYS sys sys_mq_timedreceive 5 sys sys_mq_notify 2 /* 4275 */ sys sys_mq_getsetattr 3 - sys sys_ni_syscall 0 /* sys_vserver */ + sys sys_vserver 3 sys sys_waitid 5 sys sys_ni_syscall 0 /* available, was setaltroot */ sys sys_add_key 5 /* 4280 */ diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 53e65ff..e30191bf 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -362,7 +362,7 @@ sys_call_table: PTR sys_mq_timedreceive PTR sys_mq_notify PTR sys_mq_getsetattr /* 5235 */ - PTR sys_ni_syscall /* sys_vserver */ + PTR sys_vserver PTR sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 5476ce4..299bd79 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -361,7 +361,7 @@ EXPORT(sysn32_call_table) PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* 6240, sys_vserver */ + PTR sys32_vserver /* 6240 */ PTR compat_sys_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index a56175b..afdd539 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -480,7 +480,7 @@ sys_call_table: PTR compat_sys_mq_timedreceive PTR compat_sys_mq_notify /* 4275 */ PTR compat_sys_mq_getsetattr - PTR sys_ni_syscall /* sys_vserver */ + PTR sys32_vserver PTR sys_32_waitid PTR sys_ni_syscall /* available, was setaltroot */ PTR sys_add_key /* 4280 */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5c8a49d..20d5adc 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -343,9 +343,10 @@ void show_registers(struct pt_regs *regs) __show_regs(regs); print_modules(); - printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n", - current->comm, current->pid, current_thread_info(), current, - field, current_thread_info()->tp_value); + printk("Process %s (pid: %d:#%u, threadinfo=%p, task=%p, tls=%0*lx)\n", + current->comm, task_pid_nr(current), current->xid, + current_thread_info(), current, + field, current_thread_info()->tp_value); if (cpu_has_userlocal) { unsigned long tls; diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index b8314cfe..5bfa31a 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -28,6 +28,23 @@ #include /* For VMALLOC_END */ #include +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 302d779..b8b4e97 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -71,6 +71,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, struct vm_area_struct *vma; unsigned long addr = addr0; int do_color_align; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); if (unlikely(len > TASK_SIZE)) return -ENOMEM; @@ -95,6 +96,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, do_color_align = 1; /* requesting a specific address */ + +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -102,8 +108,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } @@ -118,7 +123,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, offset)) return addr; addr = vma->vm_end; if (do_color_align) @@ -144,10 +149,11 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { - vma = find_vma(mm, addr - len); - if (!vma || addr <= vma->vm_start) { + addr -= len; + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* cache the address as a hint for next time */ - return mm->free_area_cache = addr - len; + return (mm->free_area_cache = addr); } } @@ -155,17 +161,17 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, goto bottomup; addr = mm->mmap_base - len; - if (do_color_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); do { + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr + len <= vma->vm_start)) { + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* cache the address as a hint for next time */ return mm->free_area_cache = addr; } @@ -175,10 +181,8 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start - len; - if (do_color_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -223,6 +227,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (current->flags & PF_RANDOMIZE) { random_factor = get_random_int(); random_factor = random_factor << PAGE_SHIFT; @@ -234,38 +242,23 @@ void arch_pick_mmap_layout(struct mm_struct *mm) if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(random_factor); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } } - -static inline unsigned long brk_rnd(void) -{ - unsigned long rnd = get_random_int(); - - rnd = rnd << PAGE_SHIFT; - /* 8MB for 32bit, 256MB for 64bit */ - if (TASK_IS_32BIT_ADDR) - rnd = rnd & 0x7ffffful; - else - rnd = rnd & 0xffffffful; - - return rnd; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long base = mm->brk; - unsigned long ret; - - ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - - return ret; -} diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index ed1c542..88552ac 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -335,8 +335,8 @@ static int octeon_write_config(struct pci_bus *bus, unsigned int devfn, static struct pci_ops octeon_pci_ops = { - octeon_read_config, - octeon_write_config, + .read = octeon_read_config, + .write = octeon_write_config, }; static struct resource octeon_pci_mem_resource = { diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c index 0583c463..c07a38e 100644 --- a/arch/mips/pci/pcie-octeon.c +++ b/arch/mips/pci/pcie-octeon.c @@ -1238,8 +1238,8 @@ static int octeon_pcie1_write_config(struct pci_bus *bus, unsigned int devfn, } static struct pci_ops octeon_pcie0_ops = { - octeon_pcie0_read_config, - octeon_pcie0_write_config, + .read = octeon_pcie0_read_config, + .write = octeon_pcie0_write_config, }; static struct resource octeon_pcie0_mem_resource = { @@ -1259,8 +1259,8 @@ static struct pci_controller octeon_pcie0_controller = { }; static struct pci_ops octeon_pcie1_ops = { - octeon_pcie1_read_config, - octeon_pcie1_write_config, + .read = octeon_pcie1_read_config, + .write = octeon_pcie1_write_config, }; static struct resource octeon_pcie1_mem_resource = { diff --git a/arch/mips/sni/rm200.c b/arch/mips/sni/rm200.c index 3ab5b5d..67145ff 100644 --- a/arch/mips/sni/rm200.c +++ b/arch/mips/sni/rm200.c @@ -270,7 +270,7 @@ spurious_8259A_irq: "spurious RM200 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index a39ef32..98c4860 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -653,7 +653,7 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); return -1; } diff --git a/arch/mips/vr41xx/common/irq.c b/arch/mips/vr41xx/common/irq.c index fad2bef..6499c27 100644 --- a/arch/mips/vr41xx/common/irq.c +++ b/arch/mips/vr41xx/common/irq.c @@ -65,7 +65,7 @@ static void irq_dispatch(unsigned int irq) irq_cascade_t *cascade; if (irq >= NR_IRQS) { - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); return; } @@ -85,7 +85,7 @@ static void irq_dispatch(unsigned int irq) ret = cascade->get_irq(irq); irq = ret; if (ret < 0) - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); else irq_dispatch(irq); if (!irqd_irq_disabled(idata) && chip->irq_unmask) diff --git a/arch/mn10300/proc-mn103e010/include/proc/cache.h b/arch/mn10300/proc-mn103e010/include/proc/cache.h index 967d144..db12197 100644 --- a/arch/mn10300/proc-mn103e010/include/proc/cache.h +++ b/arch/mn10300/proc-mn103e010/include/proc/cache.h @@ -11,12 +11,14 @@ #ifndef _ASM_PROC_CACHE_H #define _ASM_PROC_CACHE_H +#include + /* L1 cache */ #define L1_CACHE_NWAYS 4 /* number of ways in caches */ #define L1_CACHE_NENTRIES 256 /* number of entries in each way */ -#define L1_CACHE_BYTES 16 /* bytes per entry */ #define L1_CACHE_SHIFT 4 /* shift for bytes per entry */ +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* bytes per entry */ #define L1_CACHE_WAYDISP 0x1000 /* displacement of one way from the next */ #define L1_CACHE_TAG_VALID 0x00000001 /* cache tag valid bit */ diff --git a/arch/mn10300/proc-mn2ws0050/include/proc/cache.h b/arch/mn10300/proc-mn2ws0050/include/proc/cache.h index bcb5df2..84fabd2 100644 --- a/arch/mn10300/proc-mn2ws0050/include/proc/cache.h +++ b/arch/mn10300/proc-mn2ws0050/include/proc/cache.h @@ -16,13 +16,15 @@ #ifndef _ASM_PROC_CACHE_H #define _ASM_PROC_CACHE_H +#include + /* * L1 cache */ #define L1_CACHE_NWAYS 4 /* number of ways in caches */ #define L1_CACHE_NENTRIES 128 /* number of entries in each way */ -#define L1_CACHE_BYTES 32 /* bytes per entry */ #define L1_CACHE_SHIFT 5 /* shift for bytes per entry */ +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* bytes per entry */ #define L1_CACHE_WAYDISP 0x1000 /* distance from one way to the next */ #define L1_CACHE_TAG_VALID 0x00000001 /* cache tag valid bit */ diff --git a/arch/openrisc/include/asm/cache.h b/arch/openrisc/include/asm/cache.h index 4ce7a01..449202a 100644 --- a/arch/openrisc/include/asm/cache.h +++ b/arch/openrisc/include/asm/cache.h @@ -19,11 +19,13 @@ #ifndef __ASM_OPENRISC_CACHE_H #define __ASM_OPENRISC_CACHE_H +#include + /* FIXME: How can we replace these with values from the CPU... * they shouldn't be hard-coded! */ -#define L1_CACHE_BYTES 16 #define L1_CACHE_SHIFT 4 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #endif /* __ASM_OPENRISC_CACHE_H */ diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fdfd8be..18ffdb3 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -278,6 +278,8 @@ source "fs/Kconfig" source "arch/parisc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index c4b779b..775b66b 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -335,6 +335,16 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + #endif /* !CONFIG_64BIT */ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 47f11c7..3420df2 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -5,6 +5,7 @@ #ifndef __ARCH_PARISC_CACHE_H #define __ARCH_PARISC_CACHE_H +#include /* * PA 2.0 processors have 64-byte cachelines; PA 1.1 processors have @@ -15,13 +16,13 @@ * just ruin performance. */ #ifdef CONFIG_PA20 -#define L1_CACHE_BYTES 64 #define L1_CACHE_SHIFT 6 #else -#define L1_CACHE_BYTES 32 #define L1_CACHE_SHIFT 5 #endif +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) + #ifndef __ASSEMBLY__ #define SMP_CACHE_BYTES L1_CACHE_BYTES diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 19f6cb1..6c78cf2 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -342,6 +342,13 @@ struct pt_regs; /* forward declaration... */ #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x10000UL + +#define PAX_DELTA_MMAP_LEN 16 +#define PAX_DELTA_STACK_LEN 16 +#endif + /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index fc987a1..6e068ef 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -61,6 +61,11 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)); } +static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +{ + pgd_populate(mm, pgd, pmd); +} + static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, @@ -93,6 +98,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() +#define pgd_populate_kernel(mm, pmd, pte) BUG() #endif diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 9d35a3e..af9b6d3 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,6 +16,8 @@ #include #include +extern spinlock_t pa_dbit_lock; + /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel * memory. For the return value to be meaningful, ADDR must be >= @@ -44,8 +46,11 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #define set_pte_at(mm, addr, ptep, pteval) \ do { \ + unsigned long flags; \ + spin_lock_irqsave(&pa_dbit_lock, flags); \ set_pte(ptep, pteval); \ purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_dbit_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -216,6 +221,17 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); #define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) #define PAGE_COPY PAGE_EXECREAD #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) + +#ifdef CONFIG_PAX_PAGEEXEC +# define PAGE_SHARED_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) +# define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +# define PAGE_READONLY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) #define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX) @@ -433,48 +449,46 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_SMP + pte_t pte; + unsigned long flags; + if (!pte_young(*ptep)) return 0; - return test_and_clear_bit(xlate_pabit(_PAGE_ACCESSED_BIT), &pte_val(*ptep)); -#else - pte_t pte = *ptep; - if (!pte_young(pte)) + + spin_lock_irqsave(&pa_dbit_lock, flags); + pte = *ptep; + if (!pte_young(pte)) { + spin_unlock_irqrestore(&pa_dbit_lock, flags); return 0; - set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); + } + set_pte(ptep, pte_mkold(pte)); + purge_tlb_entries(vma->vm_mm, addr); + spin_unlock_irqrestore(&pa_dbit_lock, flags); return 1; -#endif } -extern spinlock_t pa_dbit_lock; - struct mm_struct; static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t old_pte; + unsigned long flags; - spin_lock(&pa_dbit_lock); + spin_lock_irqsave(&pa_dbit_lock, flags); old_pte = *ptep; pte_clear(mm,addr,ptep); - spin_unlock(&pa_dbit_lock); + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_dbit_lock, flags); return old_pte; } static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_SMP - unsigned long new, old; - - do { - old = pte_val(*ptep); - new = pte_val(pte_wrprotect(__pte (old))); - } while (cmpxchg((unsigned long *) ptep, old, new) != old); + unsigned long flags; + spin_lock_irqsave(&pa_dbit_lock, flags); + set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); -#else - pte_t old_pte = *ptep; - set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); -#endif + spin_unlock_irqrestore(&pa_dbit_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ff4cf9d..c0564bb 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -253,10 +253,10 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - int sz = __compiletime_object_size(to); + size_t sz = __compiletime_object_size(to); int ret = -EFAULT; - if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n)) + if (likely(sz == (size_t)-1 || !__builtin_constant_p(n) || sz >= n)) ret = __copy_from_user(to, from, n); else copy_from_user_overflow(); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 5241698..91dcb12 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -428,14 +428,11 @@ void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) /* Note: purge_tlb_entries can be called at startup with no context. */ - /* Disable preemption while we play with %sr1. */ - preempt_disable(); + purge_tlb_start(flags); mtsp(mm->context, 1); - purge_tlb_start(flags); pdtlb(addr); pitlb(addr); purge_tlb_end(flags); - preempt_enable(); } EXPORT_SYMBOL(purge_tlb_entries); diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 5709c5e..14285ca 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -394,7 +394,7 @@ EXPORT_SYMBOL(print_pci_hwpath); static void setup_bus_id(struct parisc_device *padev) { struct hardware_path path; - char name[20]; + char name[28]; char *output = name; int i; diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 5e34ccf..672bc9c 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -98,16 +98,38 @@ /* three functions to determine where in the module core * or init pieces the location is */ +static inline int in_init_rx(struct module *me, void *loc) +{ + return (loc >= me->module_init_rx && + loc < (me->module_init_rx + me->init_size_rx)); +} + +static inline int in_init_rw(struct module *me, void *loc) +{ + return (loc >= me->module_init_rw && + loc < (me->module_init_rw + me->init_size_rw)); +} + static inline int in_init(struct module *me, void *loc) { - return (loc >= me->module_init && - loc <= (me->module_init + me->init_size)); + return in_init_rx(me, loc) || in_init_rw(me, loc); +} + +static inline int in_core_rx(struct module *me, void *loc) +{ + return (loc >= me->module_core_rx && + loc < (me->module_core_rx + me->core_size_rx)); +} + +static inline int in_core_rw(struct module *me, void *loc) +{ + return (loc >= me->module_core_rw && + loc < (me->module_core_rw + me->core_size_rw)); } static inline int in_core(struct module *me, void *loc) { - return (loc >= me->module_core && - loc <= (me->module_core + me->core_size)); + return in_core_rx(me, loc) || in_core_rw(me, loc); } static inline int in_local(struct module *me, void *loc) @@ -373,13 +395,13 @@ int module_frob_arch_sections(CONST Elf_Ehdr *hdr, } /* align things a bit */ - me->core_size = ALIGN(me->core_size, 16); - me->arch.got_offset = me->core_size; - me->core_size += gots * sizeof(struct got_entry); + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += gots * sizeof(struct got_entry); - me->core_size = ALIGN(me->core_size, 16); - me->arch.fdesc_offset = me->core_size; - me->core_size += fdescs * sizeof(Elf_Fdesc); + me->core_size_rw = ALIGN(me->core_size_rw, 16); + me->arch.fdesc_offset = me->core_size_rw; + me->core_size_rw += fdescs * sizeof(Elf_Fdesc); me->arch.got_max = gots; me->arch.fdesc_max = fdescs; @@ -397,7 +419,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) BUG_ON(value == 0); - got = me->module_core + me->arch.got_offset; + got = me->module_core_rw + me->arch.got_offset; for (i = 0; got[i].addr; i++) if (got[i].addr == value) goto out; @@ -415,7 +437,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) #ifdef CONFIG_64BIT static Elf_Addr get_fdesc(struct module *me, unsigned long value) { - Elf_Fdesc *fdesc = me->module_core + me->arch.fdesc_offset; + Elf_Fdesc *fdesc = me->module_core_rw + me->arch.fdesc_offset; if (!value) { printk(KERN_ERR "%s: zero OPD requested!\n", me->name); @@ -433,7 +455,7 @@ static Elf_Addr get_fdesc(struct module *me, unsigned long value) /* Create new one */ fdesc->addr = value; - fdesc->gp = (Elf_Addr)me->module_core + me->arch.got_offset; + fdesc->gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; return (Elf_Addr)fdesc; } #endif /* CONFIG_64BIT */ @@ -845,7 +867,7 @@ register_unwind_table(struct module *me, table = (unsigned char *)sechdrs[me->arch.unwind_section].sh_addr; end = table + sechdrs[me->arch.unwind_section].sh_size; - gp = (Elf_Addr)me->module_core + me->arch.got_offset; + gp = (Elf_Addr)me->module_core_rw + me->arch.got_offset; DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index a3328c2..3b812eb 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -69,7 +69,8 @@ void __init setup_cmdline(char **cmdline_p) /* called from hpux boot loader */ boot_command_line[0] = '\0'; } else { - strcpy(boot_command_line, (char *)__va(boot_args[1])); + strlcpy(boot_command_line, (char *)__va(boot_args[1]), + COMMAND_LINE_SIZE); #ifdef CONFIG_BLK_DEV_INITRD if (boot_args[2] != 0) /* did palo pass us a ramdisk? */ diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 7ea75d1..5075226 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -33,9 +33,11 @@ #include #include -static unsigned long get_unshared_area(unsigned long addr, unsigned long len) +static unsigned long get_unshared_area(struct file *filp, unsigned long addr, unsigned long len, + unsigned long flags) { struct vm_area_struct *vma; + unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags); addr = PAGE_ALIGN(addr); @@ -43,7 +45,7 @@ static unsigned long get_unshared_area(unsigned long addr, unsigned long len) /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, offset)) return addr; addr = vma->vm_end; } @@ -67,11 +69,12 @@ static int get_offset(struct address_space *mapping) return offset & 0x3FF000; } -static unsigned long get_shared_area(struct address_space *mapping, - unsigned long addr, unsigned long len, unsigned long pgoff) +static unsigned long get_shared_area(struct file *filp, struct address_space *mapping, + unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct vm_area_struct *vma; int offset = mapping ? get_offset(mapping) : 0; + unsigned long rand_offset = gr_rand_threadstack_offset(current->mm, filp, flags); offset = (offset + (pgoff << PAGE_SHIFT)) & 0x3FF000; @@ -81,7 +84,7 @@ static unsigned long get_shared_area(struct address_space *mapping, /* At this point: (!vma || addr < vma->vm_end). */ if (TASK_SIZE - len < addr) return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, rand_offset)) return addr; addr = DCACHE_ALIGN(vma->vm_end - offset) + offset; if (addr < vma->vm_end) /* handle wraparound */ @@ -100,14 +103,14 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, if (flags & MAP_FIXED) return addr; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (filp) { - addr = get_shared_area(filp->f_mapping, addr, len, pgoff); + addr = get_shared_area(filp, filp->f_mapping, addr, len, pgoff, flags); } else if(flags & MAP_SHARED) { - addr = get_shared_area(NULL, addr, len, pgoff); + addr = get_shared_area(filp, NULL, addr, len, pgoff, flags); } else { - addr = get_unshared_area(addr, len); + addr = get_unshared_area(filp, addr, len, flags); } return addr; } diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 4014d90..ffb77d8 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -361,7 +361,7 @@ ENTRY_COMP(mbind) /* 260 */ ENTRY_COMP(get_mempolicy) ENTRY_COMP(set_mempolicy) - ENTRY_SAME(ni_syscall) /* 263: reserved for vserver */ + ENTRY_DIFF(vserver) ENTRY_SAME(add_key) ENTRY_SAME(request_key) /* 265 */ ENTRY_SAME(keyctl) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index cd8b02f..d5be094 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -236,8 +236,9 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) if (err == 0) return; /* STFU */ - printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n", - current->comm, task_pid_nr(current), str, err, regs->iaoq[0]); + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld) at " RFMT "\n", + current->comm, task_pid_nr(current), current->xid, + str, err, regs->iaoq[0]); #ifdef PRINT_USER_FAULTS /* XXX for debugging only */ show_regs(regs); @@ -270,8 +271,8 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) pdc_console_restart(); if (err) - printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n", - current->comm, task_pid_nr(current), str, err); + printk(KERN_CRIT "%s (pid %d:#%u): %s (code %ld)\n", + current->comm, task_pid_nr(current), current->xid, str, err); /* Wot's wrong wif bein' racy? */ if (current->thread.flags & PARISC_KERNEL_DEATH) { @@ -733,9 +734,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm,regs->iaoq[0]); - if (vma && (regs->iaoq[0] >= vma->vm_start) - && (vma->vm_flags & VM_EXEC)) { - + if (vma && (regs->iaoq[0] >= vma->vm_start)) { fault_address = regs->iaoq[0]; fault_space = regs->iasq[0]; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index a9b765a..56d661d 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,7 @@ DEFINE_PER_CPU(struct exception_data, exception_data); static unsigned long parisc_acctyp(unsigned long code, unsigned int inst) { - if (code == 6 || code == 16) + if (code == 6 || code == 7 || code == 16) return VM_EXEC; switch (inst & 0xf0000000) { @@ -138,6 +139,116 @@ parisc_acctyp(unsigned long code, unsigned int inst) } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (instruction_pointer(regs) = fault address) + * + * returns 1 when task should be killed + * 2 when rt_sigreturn trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: unpatched PLT emulation */ + unsigned int bl, depwi; + + err = get_user(bl, (unsigned int *)instruction_pointer(regs)); + err |= get_user(depwi, (unsigned int *)(instruction_pointer(regs)+4)); + + if (err) + break; + + if (bl == 0xEA9F1FDDU && depwi == 0xD6801C1EU) { + unsigned int ldw, bv, ldw2, addr = instruction_pointer(regs)-12; + + err = get_user(ldw, (unsigned int *)addr); + err |= get_user(bv, (unsigned int *)(addr+4)); + err |= get_user(ldw2, (unsigned int *)(addr+8)); + + if (err) + break; + + if (ldw == 0x0E801096U && + bv == 0xEAC0C000U && + ldw2 == 0x0E881095U) + { + unsigned int resolver, map; + + err = get_user(resolver, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(map, (unsigned int *)(instruction_pointer(regs)+12)); + if (err) + break; + + regs->gr[20] = instruction_pointer(regs)+8; + regs->gr[21] = map; + regs->gr[22] = resolver; + regs->iaoq[0] = resolver | 3UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + return 3; + } + } + } while (0); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + +#ifndef CONFIG_PAX_EMUSIGRT + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; +#endif + + do { /* PaX: rt_sigreturn emulation */ + unsigned int ldi1, ldi2, bel, nop; + + err = get_user(ldi1, (unsigned int *)instruction_pointer(regs)); + err |= get_user(ldi2, (unsigned int *)(instruction_pointer(regs)+4)); + err |= get_user(bel, (unsigned int *)(instruction_pointer(regs)+8)); + err |= get_user(nop, (unsigned int *)(instruction_pointer(regs)+12)); + + if (err) + break; + + if ((ldi1 == 0x34190000U || ldi1 == 0x34190002U) && + ldi2 == 0x3414015AU && + bel == 0xE4008200U && + nop == 0x08000240U) + { + regs->gr[25] = (ldi1 & 2) >> 1; + regs->gr[20] = __NR_rt_sigreturn; + regs->gr[31] = regs->iaoq[1] + 16; + regs->sr[0] = regs->iasq[1]; + regs->iaoq[0] = 0x100UL; + regs->iaoq[1] = regs->iaoq[0] + 4; + regs->iasq[0] = regs->sr[2]; + regs->iasq[1] = regs->sr[2]; + return 2; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fix; @@ -192,8 +303,33 @@ good_area: acc_type = parisc_acctyp(code,regs->iir); - if ((vma->vm_flags & acc_type) != acc_type) + if ((vma->vm_flags & acc_type) != acc_type) { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (acc_type & VM_EXEC) && + (address & ~3UL) == instruction_pointer(regs)) + { + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 3: + return; +#endif + +#ifdef CONFIG_PAX_EMUTRAMP + case 2: + return; +#endif + + } + pax_report_fault(regs, (void *)instruction_pointer(regs), (void *)regs->gr[30]); + do_group_exit(SIGKILL); + } +#endif + goto bad_area; + } /* * If for any reason at all we couldn't handle the fault, make @@ -239,8 +375,9 @@ bad_area: #ifdef PRINT_USER_FAULTS printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n", - task_pid_nr(tsk), tsk->comm, code, address); + printk(KERN_DEBUG "do_page_fault() pid=%d:#%u " + "command='%s' type=%lu address=0x%08lx\n", + task_pid_nr(tsk), tsk->xid, tsk->comm, code, address); if (vma) { printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n", vma->vm_start, vma->vm_end); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bec952d..89d3842 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -347,6 +347,7 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE config KEXEC bool "kexec system call (EXPERIMENTAL)" depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP && !PPC_47x)) && EXPERIMENTAL + depends on !GRKERNSEC_KMEM help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -962,6 +963,8 @@ source "lib/Kconfig" source "arch/powerpc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" config KEYS_COMPAT diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 02e41b5..ec6e26c 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -469,6 +469,16 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + #endif /* __powerpc64__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 4b50941..5605819 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -3,6 +3,7 @@ #ifdef __KERNEL__ +#include /* bytes per L1 cache line */ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) @@ -22,7 +23,7 @@ #define L1_CACHE_SHIFT 7 #endif -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 3bf9cca..e7457d0 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -178,8 +178,19 @@ typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000)) +#define ELF_ET_DYN_BASE (0x20000000) + +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (0x10000000UL) + +#ifdef __powerpc64__ +#define PAX_DELTA_MMAP_LEN (is_32bit_task() ? 16 : 28) +#define PAX_DELTA_STACK_LEN (is_32bit_task() ? 16 : 28) +#else +#define PAX_DELTA_MMAP_LEN 15 +#define PAX_DELTA_STACK_LEN 15 +#endif +#endif /* * Our registers are always unsigned longs, whether we're a 32 bit @@ -274,9 +285,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, (0x7ff >> (PAGE_SHIFT - 12)) : \ (0x3ffff >> (PAGE_SHIFT - 12))) -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #endif /* __KERNEL__ */ /* diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h index bca8fdc..61e9580 100644 --- a/arch/powerpc/include/asm/kmap_types.h +++ b/arch/powerpc/include/asm/kmap_types.h @@ -27,6 +27,7 @@ enum km_type { KM_PPC_SYNC_PAGE, KM_PPC_SYNC_ICACHE, KM_KDB, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index b8da913..60b608a 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -9,15 +9,26 @@ typedef struct atomic_long_t a; } local_t; +typedef struct +{ + atomic_long_unchecked_t a; +} local_unchecked_t; + #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) +#define local_read_unchecked(l) atomic_long_read_unchecked(&(l)->a) #define local_set(l,i) atomic_long_set(&(l)->a, (i)) +#define local_set_unchecked(l,i) atomic_long_set_unchecked(&(l)->a, (i)) #define local_add(i,l) atomic_long_add((i),(&(l)->a)) +#define local_add_unchecked(i,l) atomic_long_add_unchecked((i),(&(l)->a)) #define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) +#define local_sub_unchecked(i,l) atomic_long_sub_unchecked((i),(&(l)->a)) #define local_inc(l) atomic_long_inc(&(l)->a) +#define local_inc_unchecked(l) atomic_long_inc_unchecked(&(l)->a) #define local_dec(l) atomic_long_dec(&(l)->a) +#define local_dec_unchecked(l) atomic_long_dec_unchecked(&(l)->a) static __inline__ long local_add_return(long a, local_t *l) { @@ -35,6 +46,7 @@ static __inline__ long local_add_return(long a, local_t *l) return t; } +#define local_add_return_unchecked(i, l) atomic_long_add_return_unchecked((i), (&(l)->a)) #define local_add_negative(a, l) (local_add_return((a), (l)) < 0) @@ -54,6 +66,7 @@ static __inline__ long local_sub_return(long a, local_t *l) return t; } +#define local_sub_return_unchecked(i, l) atomic_long_sub_return_unchecked((i), (&(l)->a)) static __inline__ long local_inc_return(local_t *l) { @@ -101,6 +114,8 @@ static __inline__ long local_dec_return(local_t *l) #define local_cmpxchg(l, o, n) \ (cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_cmpxchg_unchecked(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) #define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n))) /** diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index d4a7f64..451de1c 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -44,7 +44,7 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot) } #define arch_calc_vm_prot_bits(prot) arch_calc_vm_prot_bits(prot) -static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) +static inline pgprot_t arch_vm_get_page_prot(vm_flags_t vm_flags) { return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); } diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 5b0bde2..9f83e1a 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -151,8 +151,9 @@ extern phys_addr_t kernstart_addr; * and needs to be executable. This means the whole heap ends * up being executable. */ -#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS32 \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) @@ -180,6 +181,9 @@ extern phys_addr_t kernstart_addr; #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) #endif +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + /* * Use the top bit of the higher-level page table entries to indicate whether * the entries we point to contain hugepages. This works because we know that diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index fb40ede..d3ce956 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -144,15 +144,18 @@ do { \ * stack by default, so in the absence of a PT_GNU_STACK program header * we turn execute permission off. */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_STACK_DEFAULT_FLAGS32 \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) #define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#ifndef CONFIG_PAX_PAGEEXEC #define VM_STACK_DEFAULT_FLAGS \ (is_32bit_task() ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) +#endif #include diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index 292725c..f87ae14 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -50,6 +50,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #ifndef CONFIG_PPC_64K_PAGES #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) +#define pgd_populate_kernel(MM, PGD, PUD) pgd_populate((MM), (PGD), (PUD)) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -67,6 +68,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) pud_set(pud, (unsigned long)pmd); } +static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_populate(mm, pud, pmd); +} + #define pmd_populate(mm, pmd, pte_page) \ pmd_populate_kernel(mm, pmd, page_address(pte_page)) #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte)) @@ -76,6 +82,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) #else /* CONFIG_PPC_64K_PAGES */ #define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) +#define pud_populate_kernel(mm, pud, pmd) pud_populate((mm), (pud), (pmd)) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 88b0bd9..e32bc67 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_POWERPC_PGTABLE_H #ifdef __KERNEL__ +#include #ifndef __ASSEMBLY__ #include /* For TASK_SIZE */ #include diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/pte-hash32.h index 4aad413..85d86bf 100644 --- a/arch/powerpc/include/asm/pte-hash32.h +++ b/arch/powerpc/include/asm/pte-hash32.h @@ -21,6 +21,7 @@ #define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */ #define _PAGE_USER 0x004 /* usermode access allowed */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ +#define _PAGE_EXEC _PAGE_GUARDED #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ #define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 578e5a0..2ab6a8a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -212,6 +212,7 @@ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ #define DSISR_NOHPTE 0x40000000 /* no translation found */ +#define DSISR_GUARDED 0x10000000 /* fetch from guarded storage */ #define DSISR_PROTFAULT 0x08000000 /* protection fault */ #define DSISR_ISSTORE 0x02000000 /* access was a store */ #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index adba970..ef0d917 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -50,7 +50,7 @@ struct smp_ops_t { int (*cpu_disable)(void); void (*cpu_die)(unsigned int nr); int (*cpu_bootable)(unsigned int nr); -}; +} __no_const; extern void smp_send_debugger_break(void); extern void start_secondary_resume(void); diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index e30a13d..2b7d994 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -530,7 +530,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) #endif -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) /* Used in very early kernel initialization. */ extern unsigned long reloc_offset(void); diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 836f231..39d0b94 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -104,7 +104,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ -#define TIF_MEMDIE 9 /* is terminating due to OOM killer */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -112,6 +111,9 @@ static inline struct thread_info *current_thread_info(void) #define TIF_FREEZE 14 /* Freezing for suspend */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ #define TIF_RUNLATCH 16 /* Is the runlatch enabled? */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +/* mask must be expressable within 16 bits to satisfy 'andi' instruction reqs */ +#define TIF_GRSEC_SETXID 9 /* update credentials on syscall entry/exit */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< INT_MAX) + return n; + + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if ((long)n < 0 || n > INT_MAX) + return n; + + if (likely(access_ok(VERIFY_WRITE, to, n))) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + n = __copy_to_user(to, from, n); + } + return n; +} + +extern unsigned long copy_in_user(void __user *to, const void __user *from, + unsigned long n); + +#endif /* __powerpc64__ */ + extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index d3d1b5e..c43e77b 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -275,7 +275,7 @@ #endif #define __NR_rtas 255 #define __NR_sys_debug_setcontext 256 -/* Number 257 is reserved for vserver */ +#define __NR_vserver 257 #define __NR_migrate_pages 258 #define __NR_mbind 259 #define __NR_get_mempolicy 260 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ce4f7f1..fed0f27 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_REMOVE_cputable.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS) +CFLAGS_REMOVE_prom_init.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS) +CFLAGS_REMOVE_btext.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS) +CFLAGS_REMOVE_prom.o = $(LATENT_ENTROPY_PLUGIN_CFLAGS) + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog @@ -26,6 +31,8 @@ CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog endif +CFLAGS_REMOVE_prom_init.o += $(LATENT_ENTROPY_PLUGIN_CFLAGS) + obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ init_task.o process.o systbl.o idle.o \ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 429983c..7af363b 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -587,6 +587,7 @@ storage_fault_common: std r14,_DAR(r1) std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs mr r4,r14 mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) @@ -596,8 +597,7 @@ storage_fault_common: cmpdi r3,0 bne- 1f b .ret_from_except_lite -1: bl .save_nvgprs - mr r5,r3 +1: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) bl .bad_page_fault diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8c3baa0..4d8c6f1 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1004,10 +1004,10 @@ handle_page_fault: 11: ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs bl .do_page_fault cmpdi r3,0 beq+ 13f - bl .save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 745c1e7..d231072 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -324,6 +324,8 @@ static inline void handle_one_irq(unsigned int irq) set_bits(irqtp->flags, &curtp->flags); } +extern void gr_handle_kernel_exploit(void); + static inline void check_stack_overflow(void) { #ifdef CONFIG_DEBUG_STACKOVERFLOW @@ -336,6 +338,7 @@ static inline void check_stack_overflow(void) printk("do_IRQ: stack overflow: %ld\n", sp - sizeof(struct thread_info)); dump_stack(); + gr_handle_kernel_exploit(); } #endif } @@ -547,9 +550,6 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, host->ops = ops; host->of_node = of_node_get(of_node); - if (host->ops->match == NULL) - host->ops->match = default_irq_host_match; - raw_spin_lock_irqsave(&irq_big_lock, flags); /* If it's a legacy controller, check for duplicates and @@ -622,7 +622,12 @@ struct irq_host *irq_find_host(struct device_node *node) */ raw_spin_lock_irqsave(&irq_big_lock, flags); list_for_each_entry(h, &irq_hosts, link) - if (h->ops->match(h, node)) { + if (h->ops->match) { + if (h->ops->match(h, node)) { + found = h; + break; + } + } else if (default_irq_host_match(h, node)) { found = h; break; } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2e3200c..7118986 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -162,7 +162,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, me->arch.core_plt_section = i; } if (!me->arch.core_plt_section || !me->arch.init_plt_section) { - printk("Module doesn't contain .plt or .init.plt sections.\n"); + printk("Module %s doesn't contain .plt or .init.plt sections.\n", me->name); return -ENOEXEC; } @@ -192,11 +192,16 @@ static uint32_t do_plt_call(void *location, DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->module_core - && location < mod->module_core + mod->core_size) + if ((location >= mod->module_core_rx && location < mod->module_core_rx + mod->core_size_rx) || + (location >= mod->module_core_rw && location < mod->module_core_rw + mod->core_size_rw)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; - else + else if ((location >= mod->module_init_rx && location < mod->module_init_rx + mod->init_size_rx) || + (location >= mod->module_init_rw && location < mod->module_init_rw + mod->init_size_rw)) entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + else { + printk(KERN_ERR "%s: invalid R_PPC_REL24 entry found\n", mod->name); + return ~0UL; + } /* Find this entry, or if that fails, the next avail. entry */ while (entry->jump[0]) { @@ -300,7 +305,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE module->arch.tramp = - do_plt_call(module->module_core, + do_plt_call(module->module_core_rx, (unsigned long)ftrace_caller, sechdrs, module); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d687e3f..2609cb3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -640,8 +640,9 @@ void show_regs(struct pt_regs * regs) #else printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); #endif - printk("TASK = %p[%d] '%s' THREAD: %p", - current, task_pid_nr(current), current->comm, task_thread_info(current)); + printk("TASK = %p[%d,#%u] '%s' THREAD: %p", + current, task_pid_nr(current), current->xid, + current->comm, task_thread_info(current)); #ifdef CONFIG_SMP printk(" CPU: %d", raw_smp_processor_id()); @@ -660,8 +661,8 @@ void show_regs(struct pt_regs * regs) * Lookup NIP late so we have the best change of getting the * above info out without failing */ - printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); - printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); + printk("NIP ["REG"] %pA\n", regs->nip, (void *)regs->nip); + printk("LR ["REG"] %pA\n", regs->link, (void *)regs->link); #endif show_stack(current, (unsigned long *) regs->gpr[1]); if (!user_mode(regs)) @@ -1157,10 +1158,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) newsp = stack[0]; ip = stack[STACK_FRAME_LR_SAVE]; if (!firstframe || ip != lr) { - printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); + printk("["REG"] ["REG"] %pA", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((ip == rth || ip == mrth) && curr_frame >= 0) { - printk(" (%pS)", + printk(" (%pA)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; } @@ -1180,7 +1181,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); lr = regs->link; - printk("--- Exception: %lx at %pS\n LR = %pS\n", + printk("--- Exception: %lx at %pA\n LR = %pA\n", regs->trap, (void *)regs->nip, (void *)lr); firstframe = 1; } @@ -1250,63 +1251,8 @@ void free_thread_info(struct thread_info *ti) void thread_info_cache_init(void) { thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, - THREAD_SIZE, 0, NULL); + THREAD_SIZE, SLAB_USERCOPY, NULL); BUG_ON(thread_info_cache == NULL); } #endif /* THREAD_SHIFT < PAGE_SHIFT */ - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; - return sp & ~0xf; -} - -static inline unsigned long brk_rnd(void) -{ - unsigned long rnd = 0; - - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT))); - else - rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT))); - - return rnd << PAGE_SHIFT; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long base = mm->brk; - unsigned long ret; - -#ifdef CONFIG_PPC_STD_MMU_64 - /* - * If we are using 1TB segments and we are allowed to randomise - * the heap, we can put it above 1TB so it is backed by a 1TB - * segment. Otherwise the heap will be in the bottom 1TB - * which always uses 256MB segments and this may result in a - * performance penalty. - */ - if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) - base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); -#endif - - ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - - return ret; -} - -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < base) - return base; - - return ret; -} diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 5de73db..a05f61c 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1702,6 +1702,10 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } +#ifdef CONFIG_GRKERNSEC_SETXID +extern void gr_delayed_cred_worker(void); +#endif + /* * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. @@ -1712,6 +1716,11 @@ long do_syscall_trace_enter(struct pt_regs *regs) secure_computing(regs->gpr[0]); +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + if (test_thread_flag(TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs)) /* @@ -1748,6 +1757,11 @@ void do_syscall_trace_leave(struct pt_regs *regs) { int step; +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + if (unlikely(current->audit_context)) audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS, regs->result); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fa1e56b..e8ef867 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -865,7 +865,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; addr = frame; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso32_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { if (save_user_regs(regs, frame, 0, 1)) goto badframe; regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 60d1f75..2c29348 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -435,7 +435,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, current->thread.fpscr.val = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso64_rt_sigtramp && current->mm->context.vdso_base != ~0UL) { regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index f2496f2..4e3cc47 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -107,11 +107,11 @@ long ppc64_personality(unsigned long personality) long ret; if (personality(current->personality) == PER_LINUX32 - && personality == PER_LINUX) - personality = PER_LINUX32; + && personality(personality) == PER_LINUX) + personality = (personality & ~PER_MASK) | PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret = (ret & ~PER_MASK) | PER_LINUX; return ret; } #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ca683a1..ab912dd 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -531,7 +531,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata sysfs_cpu_nb = { +static struct notifier_block sysfs_cpu_nb = { .notifier_call = sysfs_cpu_notify, }; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9844662..60ce76b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -98,6 +98,8 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif +extern void gr_handle_kernel_exploit(void); + int die(const char *str, struct pt_regs *regs, long err) { static struct { @@ -171,6 +173,8 @@ int die(const char *str, struct pt_regs *regs, long err) if (panic_on_oops) panic("Fatal exception"); + gr_handle_kernel_exploit(); + oops_exit(); do_exit(err); @@ -1093,8 +1097,9 @@ void nonrecoverable_exception(struct pt_regs *regs) void trace_syscall(struct pt_regs *regs) { - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0], + printk("Task: %p(%d[#%u]), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, task_pid_nr(current), current->xid, + regs->nip, regs->link, regs->gpr[0], regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 7d14bb69..1305601 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "setup.h" @@ -219,7 +220,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_base = VDSO32_MBASE; #endif - current->mm->context.vdso_base = 0; + current->mm->context.vdso_base = ~0UL; /* vDSO has a problem and was disabled, just don't "enable" it for the * process @@ -239,7 +240,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vdso_base = get_unmapped_area(NULL, vdso_base, (vdso_pages << PAGE_SHIFT) + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), - 0, 0); + 0, MAP_PRIVATE | MAP_EXECUTABLE); if (IS_ERR_VALUE(vdso_base)) { rc = vdso_base; goto fail_mmapsem; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 607fbdf..ac940f3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -730,7 +730,7 @@ out: return r; } -int kvm_arch_init(void *opaque) +int kvm_arch_init(const void *opaque) { return 0; } diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c index 5eea6f3..5d10396 100644 --- a/arch/powerpc/lib/usercopy_64.c +++ b/arch/powerpc/lib/usercopy_64.c @@ -9,22 +9,6 @@ #include #include -unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; -} - -unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (likely(access_ok(VERIFY_WRITE, to, n))) - n = __copy_to_user(to, from, n); - return n; -} - unsigned long copy_in_user(void __user *to, const void __user *from, unsigned long n) { @@ -35,7 +19,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, return n; } -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(copy_in_user); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7450843..9f8cfc7 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,6 +32,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -43,6 +47,7 @@ #include #include #include +#include #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs) @@ -66,6 +71,33 @@ static inline int notify_page_fault(struct pt_regs *regs) } #endif +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->nip = fault address) + * + * returns 1 when task should be killed + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int __user *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. @@ -136,7 +168,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * indicate errors in DSISR but can validly be set in SRR1. */ if (trap == 0x400) - error_code &= 0x48200000; + error_code &= 0x58200000; else is_write = error_code & DSISR_ISSTORE; #else @@ -259,7 +291,7 @@ good_area: * "undefined". Of those that can be set, this is the only * one which seems bad. */ - if (error_code & 0x10000000) + if (error_code & DSISR_GUARDED) /* Guarded storage error. */ goto bad_area; #endif /* CONFIG_8xx */ @@ -274,7 +306,7 @@ good_area: * processors use the same I/D cache coherency mechanism * as embedded. */ - if (error_code & DSISR_PROTFAULT) + if (error_code & (DSISR_PROTFAULT | DSISR_GUARDED)) goto bad_area; #endif /* CONFIG_PPC_STD_MMU */ @@ -345,6 +377,23 @@ bad_area: bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { +#ifdef CONFIG_PPC_STD_MMU + if (is_exec && (error_code & (DSISR_PROTFAULT | DSISR_GUARDED))) { +#else + if (is_exec && regs->nip == address) { +#endif + switch (pax_handle_fetch_fault(regs)) { + } + + pax_report_fault(regs, (void *)regs->nip, (void *)regs->gpr[PT_R1]); + do_group_exit(SIGKILL); + } + } +#endif + _exception(SIGSEGV, regs, code, address); return 0; } diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 5a783d8..522eb00 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c @@ -61,10 +61,14 @@ static inline int mmap_is_legacy(void) * * To avoid this we can shift the randomness by 1 bit. */ -static unsigned long mmap_rnd(void) +static unsigned long mmap_rnd(struct mm_struct *mm) { unsigned long rnd = 0; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (current->flags & PF_RANDOMIZE) { /* 8MB for 32bit, 1GB for 64bit */ if (is_32bit_task()) @@ -75,7 +79,7 @@ static unsigned long mmap_rnd(void) return (rnd << PAGE_SHIFT) * 2; } -static inline unsigned long mmap_base(void) +static inline unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -84,7 +88,7 @@ static inline unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); + return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd(mm)); } /* @@ -99,10 +103,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { - mm->mmap_base = mmap_base(); + mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 5b63bd3..248942d 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -370,7 +370,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata mmu_context_cpu_nb = { +static struct notifier_block mmu_context_cpu_nb = { .notifier_call = mmu_context_cpu_notify, }; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f4b78a3..a11ee27 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -659,7 +659,7 @@ static void __init parse_drconf_memory(struct device_node *memory) unsigned int n, rc, ranges, is_kexec_kdump = 0; unsigned long lmb_size, base, size, sz; int nid; - struct assoc_arrays aa; + struct assoc_arrays aa = { .arrays = NULL }; n = of_get_drconf_memory(memory, &dm); if (!n) @@ -964,7 +964,7 @@ static void __init *careful_zallocation(int nid, unsigned long size, return ret; } -static struct notifier_block __cpuinitdata ppc64_numa_nb = { +static struct notifier_block ppc64_numa_nb = { .notifier_call = cpu_numa_callback, .priority = 1 /* Must run before sched domains notifier. */ }; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 73709f7..8e825a8 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -98,7 +98,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, if ((mm->task_size - len) < addr) return 0; vma = find_vma(mm, addr); - return (!vma || (addr + len) <= vma->vm_start); + return check_heap_stack_gap(vma, &addr, len, 0); } static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) @@ -256,7 +256,7 @@ full_search: addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); continue; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, &addr, len, 0)) { /* * Remember the place where we stopped the search: */ @@ -313,10 +313,14 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, } } - addr = mm->mmap_base; - while (addr > len) { + if (mm->mmap_base < len) + addr = -ENOMEM; + else + addr = mm->mmap_base - len; + + while (!IS_ERR_VALUE(addr)) { /* Go down by chunk size */ - addr = _ALIGN_DOWN(addr - len, 1ul << pshift); + addr = _ALIGN_DOWN(addr, 1ul << pshift); /* Check for hit with different page size */ mask = slice_range_to_mask(addr, len); @@ -336,7 +340,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * return with success: */ vma = find_vma(mm, addr); - if (!vma || (addr + len) <= vma->vm_start) { + if (check_heap_stack_gap(vma, &addr, len, 0)) { /* remember the address as a hint for next time */ if (use_cache) mm->free_area_cache = addr; @@ -348,7 +352,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start; + addr = skip_heap_stack_gap(vma, len, 0); } /* @@ -426,6 +430,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, if (fixed && addr > (mm->task_size - len)) return -EINVAL; +#ifdef CONFIG_PAX_RANDMMAP + if (!fixed && (mm->pax_flags & MF_PAX_RANDMMAP)) + addr = 0; +#endif + /* If hint, make sure it matches our alignment restrictions */ if (!fixed && addr) { addr = _ALIGN_UP(addr, 1ul << pshift); diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 14be2bd..56f51cb 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -400,8 +400,8 @@ static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn, } static struct pci_ops scc_pciex_pci_ops = { - scc_pciex_read_config, - scc_pciex_write_config, + .read = scc_pciex_read_config, + .write = scc_pciex_write_config, }; static void pciex_clear_intr_all(unsigned int __iomem *base) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 0cfece4..2f1a0e5 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -281,9 +281,9 @@ spufs_mem_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } -static int spufs_mem_mmap_access(struct vm_area_struct *vma, +static ssize_t spufs_mem_mmap_access(struct vm_area_struct *vma, unsigned long address, - void *buf, int len, int write) + void *buf, size_t len, int write) { struct spu_context *ctx = vma->vm_file->private_data; unsigned long offset = address - vma->vm_start; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 941d5cb..1803d9e 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -811,6 +811,7 @@ static struct file_system_type spufs_type = { .mount = spufs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("spufs"); static int __init spufs_init(void) { diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 3394254..8c6825c 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -886,7 +886,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { +static struct notifier_block smp_core99_cpu_nb = { .notifier_call = smp_core99_cpu_notify, }; #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index d2383cf..a6d33c8 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -61,7 +61,7 @@ static int eeh_event_handler(void * dummy) struct eeh_event *event; struct pci_dn *pdn; - daemonize ("eehd"); + set_task_comm(current, "eehd"); set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&eeh_eventlist_lock, flags); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index aed3d91..737f734 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -644,6 +644,8 @@ source "fs/Kconfig" source "arch/s390/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 24bff4f..0248123 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -610,7 +610,7 @@ static int __cpuinit appldata_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata appldata_nb = { +static struct notifier_block appldata_nb = { .notifier_call = appldata_cpu_notify, }; diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 481f4f7..f16ec59 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -454,6 +454,7 @@ static struct file_system_type hypfs_type = { .mount = hypfs_mount, .kill_sb = hypfs_kill_super }; +MODULE_ALIAS_FS("s390_hypfs"); static const struct super_operations hypfs_s_ops = { .statfs = simple_statfs, diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 8517d2a..d2738d4 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -326,6 +326,16 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() #define smp_mb__before_atomic_inc() smp_mb() diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h index 2a30d5a..5e5586f 100644 --- a/arch/s390/include/asm/cache.h +++ b/arch/s390/include/asm/cache.h @@ -11,8 +11,10 @@ #ifndef __ARCH_S390_CACHE_H #define __ARCH_S390_CACHE_H -#define L1_CACHE_BYTES 256 +#include + #define L1_CACHE_SHIFT 8 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define NET_SKB_PAD 32 #define __read_mostly __attribute__((__section__(".data..read_mostly"))) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 547f1a6..3e6d0a0 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -162,8 +162,14 @@ extern unsigned int vdso_enabled; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(STACK_TOP / 3 * 2)) +#define ELF_ET_DYN_BASE (STACK_TOP / 3 * 2) + +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_31BIT) ? 0x10000UL : 0x80000000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_31BIT) ? 15 : 26) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_31BIT) ? 15 : 26) +#endif /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ @@ -183,7 +189,8 @@ extern char elf_platform[]; #define ELF_PLATFORM (elf_platform) #ifndef __s390x__ -#define SET_PERSONALITY(ex) set_personality(PER_LINUX) +#define SET_PERSONALITY(ex) \ + set_personality(PER_LINUX | (current->personality & (~PER_MASK))) #else /* __s390x__ */ #define SET_PERSONALITY(ex) \ do { \ @@ -211,7 +218,4 @@ struct linux_binprm; #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 int arch_setup_additional_pages(struct linux_binprm *, int); -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - #endif diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index ef573c1..75a1ce6 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -262,7 +262,7 @@ extern void (*_machine_restart)(char *command); extern void (*_machine_halt)(void); extern void (*_machine_power_off)(void); -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) static inline int tprot(unsigned long addr) { diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 775a5ee..451d100 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -24,6 +24,7 @@ #include #include #include + #include #include #include diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2b23885..3db7651 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -55,6 +55,7 @@ 1; \ }) +#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size)) #define access_ok(type, addr, size) __access_ok(addr, size) /* @@ -235,6 +236,10 @@ static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + + if ((long)n < 0) + return n; + if (access_ok(VERIFY_WRITE, to, n)) n = __copy_to_user(to, from, n); return n; @@ -260,6 +265,9 @@ copy_to_user(void __user *to, const void *from, unsigned long n) static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + if (__builtin_constant_p(n) && (n <= 256)) return uaccess.copy_from_user_small(n, from, to); else @@ -291,10 +299,14 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct") static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned int sz = __compiletime_object_size(to); + size_t sz = __compiletime_object_size(to); might_fault(); - if (unlikely(sz != -1 && sz < n)) { + + if ((long)n < 0) + return n; + + if (unlikely(sz != (size_t)-1 && sz < n)) { copy_from_user_overflow(); return n; } diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 58de4c9..a59bc5e 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -202,7 +202,7 @@ #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) -/* Number 263 is reserved for vserver */ +#define __NR_vserver 263 #define __NR_statfs64 265 #define __NR_fstatfs64 266 #define __NR_remap_file_pages 267 diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index dfcb343..eda788a 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -161,11 +161,11 @@ module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, /* Increase core size by size of got & plt and set start offsets for got and plt. */ - me->core_size = ALIGN(me->core_size, 4); - me->arch.got_offset = me->core_size; - me->core_size += me->arch.got_size; - me->arch.plt_offset = me->core_size; - me->core_size += me->arch.plt_size; + me->core_size_rw = ALIGN(me->core_size_rw, 4); + me->arch.got_offset = me->core_size_rw; + me->core_size_rw += me->arch.got_size; + me->arch.plt_offset = me->core_size_rx; + me->core_size_rx += me->arch.plt_size; return 0; } @@ -242,7 +242,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, if (info->got_initialized == 0) { Elf_Addr *gotent; - gotent = me->module_core + me->arch.got_offset + + gotent = me->module_core_rw + me->arch.got_offset + info->got_offset; *gotent = val; info->got_initialized = 1; @@ -266,7 +266,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, else if (r_type == R_390_GOTENT || r_type == R_390_GOTPLTENT) *(unsigned int *) loc = - (val + (Elf_Addr) me->module_core - loc) >> 1; + (val + (Elf_Addr) me->module_core_rw - loc) >> 1; else if (r_type == R_390_GOT64 || r_type == R_390_GOTPLT64) *(unsigned long *) loc = val; @@ -280,7 +280,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { unsigned int *ip; - ip = me->module_core + me->arch.plt_offset + + ip = me->module_core_rx + me->arch.plt_offset + info->plt_offset; #ifndef CONFIG_64BIT ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */ @@ -305,7 +305,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, val - loc + 0xffffUL < 0x1ffffeUL) || (r_type == R_390_PLT32DBL && val - loc + 0xffffffffULL < 0x1fffffffeULL))) - val = (Elf_Addr) me->module_core + + val = (Elf_Addr) me->module_core_rx + me->arch.plt_offset + info->plt_offset; val += rela->r_addend - loc; @@ -327,7 +327,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_GOTOFF32: /* 32 bit offset to GOT. */ case R_390_GOTOFF64: /* 64 bit offset to GOT. */ val = val + rela->r_addend - - ((Elf_Addr) me->module_core + me->arch.got_offset); + ((Elf_Addr) me->module_core_rw + me->arch.got_offset); if (r_type == R_390_GOTOFF16) *(unsigned short *) loc = val; else if (r_type == R_390_GOTOFF32) @@ -337,7 +337,7 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, break; case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */ - val = (Elf_Addr) me->module_core + me->arch.got_offset + + val = (Elf_Addr) me->module_core_rw + me->arch.got_offset + rela->r_addend - loc; if (r_type == R_390_GOTPC) *(unsigned int *) loc = val; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 53088e2..9f44a36 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -320,39 +320,3 @@ unsigned long get_wchan(struct task_struct *p) } return 0; } - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; - return sp & ~0xf; -} - -static inline unsigned long brk_rnd(void) -{ - /* 8MB for 32bit, 1GB for 64bit */ - if (is_32bit_task()) - return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; - else - return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - return ret; -} - -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); - - if (!(current->flags & PF_RANDOMIZE)) - return base; - if (ret < base) - return base; - return ret; -} diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index b76230b..92b4c1c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1df64a8..aea2a39 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1035,7 +1035,7 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, return notifier_from_errno(err); } -static struct notifier_block __cpuinitdata smp_cpu_nb = { +static struct notifier_block smp_cpu_nb = { .notifier_call = smp_cpu_notify, }; diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index bcab2f0..6aa93ca 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -271,7 +271,7 @@ SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) -NI_SYSCALL /* reserved for vserver */ +SYSCALL(sys_vserver,sys_vserver,sys32_vserver) SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c70b3d8..d01c6b3 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -92,10 +92,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } @@ -175,10 +187,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ if (mmap_is_legacy()) { mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = s390_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = s390_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/score/include/asm/cache.h b/arch/score/include/asm/cache.h index ae3d59f..f65f075 100644 --- a/arch/score/include/asm/cache.h +++ b/arch/score/include/asm/cache.h @@ -1,7 +1,9 @@ #ifndef _ASM_SCORE_CACHE_H #define _ASM_SCORE_CACHE_H +#include + #define L1_CACHE_SHIFT 4 -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #endif /* _ASM_SCORE_CACHE_H */ diff --git a/arch/score/include/asm/system.h b/arch/score/include/asm/system.h index 589d5c7..669e274 100644 --- a/arch/score/include/asm/system.h +++ b/arch/score/include/asm/system.h @@ -17,7 +17,7 @@ do { \ #define finish_arch_switch(prev) do {} while (0) typedef void (*vi_handler_t)(void); -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) (x) #define mb() barrier() #define rmb() barrier() diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 25d0803..d6c8e36 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -161,8 +161,3 @@ unsigned long get_wchan(struct task_struct *task) return task_pt_regs(task)->cp0_epc; } - -unsigned long arch_align_stack(unsigned long sp) -{ - return sp; -} diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5629e20..2c56bda 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -901,6 +901,8 @@ source "fs/Kconfig" source "arch/sh/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/sh/include/asm/cache.h b/arch/sh/include/asm/cache.h index ef9e555..331bd29 100644 --- a/arch/sh/include/asm/cache.h +++ b/arch/sh/include/asm/cache.h @@ -9,10 +9,11 @@ #define __ASM_SH_CACHE_H #ifdef __KERNEL__ +#include #include #include -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index 03f2b55..b0270327 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -143,7 +143,7 @@ shx3_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __cpuinitdata shx3_cpu_notifier = { +static struct notifier_block shx3_cpu_notifier = { .notifier_call = shx3_cpu_callback, }; diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index a3ee919..370c557 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -14,6 +14,7 @@ #include #include #include +// #include #include #include #include diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 325f98b17..6fdc4f7 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -54,7 +54,7 @@ void free_thread_info(struct thread_info *ti) void thread_info_cache_init(void) { thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, - THREAD_SIZE, SLAB_PANIC, NULL); + THREAD_SIZE, SLAB_PANIC | SLAB_USERCOPY, NULL); } #else struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c index afeb710..8da5c79 100644 --- a/arch/sh/mm/mmap.c +++ b/arch/sh/mm/mmap.c @@ -49,6 +49,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; unsigned long start_addr; int do_colour_align; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate @@ -74,8 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } @@ -106,7 +106,7 @@ full_search: } return -ENOMEM; } - if (likely(!vma || addr + len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, &addr, len, offset))) { /* * Remember the place where we stopped the search: */ @@ -131,6 +131,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; int do_colour_align; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate @@ -157,8 +158,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (TASK_SIZE - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } @@ -178,28 +178,29 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { + addr -= len; + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return (mm->free_area_cache = addr); } } if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = mm->mmap_base-len; - if (do_colour_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); + addr = mm->mmap_base - len; do { + if (do_colour_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, &addr, len, offset))) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); } @@ -209,10 +210,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - if (do_colour_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); bottomup: /* diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f2f3574d..0c785b8 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -600,6 +600,8 @@ source "fs/Kconfig" source "arch/sparc/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index eddcfb3..b117d90 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -75,7 +75,7 @@ drivers-$(CONFIG_OPROFILE) += arch/sparc/oprofile/ # Export what is needed by arch/sparc/boot/Makefile export VMLINUX_INIT VMLINUX_MAIN VMLINUX_INIT := $(head-y) $(init-y) -VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ grsecurity/ VMLINUX_MAIN += $(patsubst %/, %/lib.a, $(libs-y)) $(libs-y) VMLINUX_MAIN += $(drivers-y) $(net-y) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 07dd35e..2c6f765 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -13,6 +13,8 @@ #include +#include + #ifdef __KERNEL__ #include diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 9f421df..71e4800 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -14,18 +14,40 @@ #define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) (*(volatile int *)&(v)->counter) +static inline int atomic_read_unchecked(const atomic_unchecked_t *v) +{ + return *(const volatile int *)&v->counter; +} #define atomic64_read(v) (*(volatile long *)&(v)->counter) +static inline long atomic64_read_unchecked(const atomic64_unchecked_t *v) +{ + return *(const volatile long *)&v->counter; +} #define atomic_set(v, i) (((v)->counter) = i) +static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i) +{ + v->counter = i; +} #define atomic64_set(v, i) (((v)->counter) = i) +static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i) +{ + v->counter = i; +} extern void atomic_add(int, atomic_t *); +extern void atomic_add_unchecked(int, atomic_unchecked_t *); extern void atomic64_add(long, atomic64_t *); +extern void atomic64_add_unchecked(long, atomic64_unchecked_t *); extern void atomic_sub(int, atomic_t *); +extern void atomic_sub_unchecked(int, atomic_unchecked_t *); extern void atomic64_sub(long, atomic64_t *); +extern void atomic64_sub_unchecked(long, atomic64_unchecked_t *); extern int atomic_add_ret(int, atomic_t *); +extern int atomic_add_ret_unchecked(int, atomic_unchecked_t *); extern long atomic64_add_ret(long, atomic64_t *); +extern long atomic64_add_ret_unchecked(long, atomic64_unchecked_t *); extern int atomic_sub_ret(int, atomic_t *); extern long atomic64_sub_ret(long, atomic64_t *); @@ -33,13 +55,29 @@ extern long atomic64_sub_ret(long, atomic64_t *); #define atomic64_dec_return(v) atomic64_sub_ret(1, v) #define atomic_inc_return(v) atomic_add_ret(1, v) +static inline int atomic_inc_return_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_ret_unchecked(1, v); +} #define atomic64_inc_return(v) atomic64_add_ret(1, v) +static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v) +{ + return atomic64_add_ret_unchecked(1, v); +} #define atomic_sub_return(i, v) atomic_sub_ret(i, v) #define atomic64_sub_return(i, v) atomic64_sub_ret(i, v) #define atomic_add_return(i, v) atomic_add_ret(i, v) +static inline int atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ + return atomic_add_ret_unchecked(i, v); +} #define atomic64_add_return(i, v) atomic64_add_ret(i, v) +static inline long atomic64_add_return_unchecked(long i, atomic64_unchecked_t *v) +{ + return atomic64_add_ret_unchecked(i, v); +} /* * atomic_inc_and_test - increment and test @@ -50,6 +88,10 @@ extern long atomic64_sub_ret(long, atomic64_t *); * other cases. */ #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) +static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v) +{ + return atomic_inc_return_unchecked(v) == 0; +} #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic_sub_and_test(i, v) (atomic_sub_ret(i, v) == 0) @@ -59,25 +101,60 @@ extern long atomic64_sub_ret(long, atomic64_t *); #define atomic64_dec_and_test(v) (atomic64_sub_ret(1, v) == 0) #define atomic_inc(v) atomic_add(1, v) +static inline void atomic_inc_unchecked(atomic_unchecked_t *v) +{ + atomic_add_unchecked(1, v); +} #define atomic64_inc(v) atomic64_add(1, v) +static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v) +{ + atomic64_add_unchecked(1, v); +} #define atomic_dec(v) atomic_sub(1, v) +static inline void atomic_dec_unchecked(atomic_unchecked_t *v) +{ + atomic_sub_unchecked(1, v); +} #define atomic64_dec(v) atomic64_sub(1, v) +static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v) +{ + atomic64_sub_unchecked(1, v); +} #define atomic_add_negative(i, v) (atomic_add_ret(i, v) < 0) #define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new) +{ + return xchg(&v->counter, new); +} static inline int __atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; + int c, old, new; c = atomic_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic_cmpxchg((v), c, c + (a)); + + asm volatile("addcc %2, %0, %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "tvs %%icc, 6\n" +#endif + + : "=r" (new) + : "0" (c), "ir" (a) + : "cc"); + + old = atomic_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; @@ -89,20 +166,35 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define atomic64_cmpxchg(v, o, n) \ ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) +static inline long atomic64_xchg_unchecked(atomic64_unchecked_t *v, long new) +{ + return xchg(&v->counter, new); +} static inline long atomic64_add_unless(atomic64_t *v, long a, long u) { - long c, old; + long c, old, new; c = atomic64_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic64_cmpxchg((v), c, c + (a)); + + asm volatile("addcc %2, %0, %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "tvs %%xcc, 6\n" +#endif + + : "=r" (new) + : "0" (c), "ir" (a) + : "cc"); + + old = atomic64_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h index 69358b5..9d0d492 100644 --- a/arch/sparc/include/asm/cache.h +++ b/arch/sparc/include/asm/cache.h @@ -7,10 +7,12 @@ #ifndef _SPARC_CACHE_H #define _SPARC_CACHE_H +#include + #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #define L1_CACHE_SHIFT 5 -#define L1_CACHE_BYTES 32 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #ifdef CONFIG_SPARC32 #define SMP_CACHE_BYTES_SHIFT 5 diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h index 4269ca6..e3da77f 100644 --- a/arch/sparc/include/asm/elf_32.h +++ b/arch/sparc/include/asm/elf_32.h @@ -114,6 +114,13 @@ typedef struct { #define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE) +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE 0x10000UL + +#define PAX_DELTA_MMAP_LEN 16 +#define PAX_DELTA_STACK_LEN 16 +#endif + /* This yields a mask that user programs can use to figure out what instruction set this cpu supports. This can NOT be done in userspace on Sparc. */ diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 7df8b7f..4946269 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -180,6 +180,13 @@ typedef struct { #define ELF_ET_DYN_BASE 0x0000010000000000UL #define COMPAT_ELF_ET_DYN_BASE 0x0000000070000000UL +#ifdef CONFIG_PAX_ASLR +#define PAX_ELF_ET_DYN_BASE (test_thread_flag(TIF_32BIT) ? 0x10000UL : 0x100000UL) + +#define PAX_DELTA_MMAP_LEN (test_thread_flag(TIF_32BIT) ? 14 : 28) +#define PAX_DELTA_STACK_LEN (test_thread_flag(TIF_32BIT) ? 15 : 29) +#endif + extern unsigned long sparc64_elf_hwcap; #define ELF_HWCAP sparc64_elf_hwcap diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h index 97a9047..290b0cd 100644 --- a/arch/sparc/include/asm/oplib_64.h +++ b/arch/sparc/include/asm/oplib_64.h @@ -62,7 +62,8 @@ struct linux_mem_p1275 { /* You must call prom_init() before using any of the library services, * preferably as early as possible. Pass it the romvec pointer. */ -extern void prom_init(void *cif_handler, void *cif_stack); +void prom_init(void *cif_handler); +void prom_init_report(void); /* Boot argument acquisition, returns the boot command line string. */ extern char *prom_getbootargs(void); diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h index 156707b..aefa786 100644 --- a/arch/sparc/include/asm/page_32.h +++ b/arch/sparc/include/asm/page_32.h @@ -8,6 +8,8 @@ #ifndef _SPARC_PAGE_H #define _SPARC_PAGE_H +#include + #define PAGE_SHIFT 12 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h index ca2b344..c6084f89 100644 --- a/arch/sparc/include/asm/pgalloc_32.h +++ b/arch/sparc/include/asm/pgalloc_32.h @@ -37,6 +37,7 @@ BTFIXUPDEF_CALL(void, free_pgd_fast, pgd_t *) BTFIXUPDEF_CALL(void, pgd_set, pgd_t *, pmd_t *) #define pgd_set(pgdp,pmdp) BTFIXUP_CALL(pgd_set)(pgdp,pmdp) #define pgd_populate(MM, PGD, PMD) pgd_set(PGD, PMD) +#define pgd_populate_kernel(MM, PGD, PMD) pgd_populate((MM), (PGD), (PMD)) BTFIXUPDEF_CALL(pmd_t *, pmd_alloc_one, struct mm_struct *, unsigned long) #define pmd_alloc_one(mm, address) BTFIXUP_CALL(pmd_alloc_one)(mm, address) diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 40b2d7a..22a665b 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -26,6 +26,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) +#define pud_populate_kernel(MM, PUD, PMD) pud_populate((MM), (PUD), (PMD)) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/sparc/include/asm/pgtable.h b/arch/sparc/include/asm/pgtable.h index 59ba6f6..4518128 100644 --- a/arch/sparc/include/asm/pgtable.h +++ b/arch/sparc/include/asm/pgtable.h @@ -5,4 +5,8 @@ #else #include #endif + +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + #endif diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index a790cc6..091ed94 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -45,6 +45,13 @@ BTFIXUPDEF_SIMM13(user_ptrs_per_pgd) BTFIXUPDEF_INT(page_none) BTFIXUPDEF_INT(page_copy) BTFIXUPDEF_INT(page_readonly) + +#ifdef CONFIG_PAX_PAGEEXEC +BTFIXUPDEF_INT(page_shared_noexec) +BTFIXUPDEF_INT(page_copy_noexec) +BTFIXUPDEF_INT(page_readonly_noexec) +#endif + BTFIXUPDEF_INT(page_kernel) #define PMD_SHIFT SUN4C_PMD_SHIFT @@ -66,6 +73,16 @@ extern pgprot_t PAGE_SHARED; #define PAGE_COPY __pgprot(BTFIXUP_INT(page_copy)) #define PAGE_READONLY __pgprot(BTFIXUP_INT(page_readonly)) +#ifdef CONFIG_PAX_PAGEEXEC +extern pgprot_t PAGE_SHARED_NOEXEC; +# define PAGE_COPY_NOEXEC __pgprot(BTFIXUP_INT(page_copy_noexec)) +# define PAGE_READONLY_NOEXEC __pgprot(BTFIXUP_INT(page_readonly_noexec)) +#else +# define PAGE_SHARED_NOEXEC PAGE_SHARED +# define PAGE_COPY_NOEXEC PAGE_COPY +# define PAGE_READONLY_NOEXEC PAGE_READONLY +#endif + extern unsigned long page_kernel; #ifdef MODULE diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h index f6ae2b2..b03ffc7 100644 --- a/arch/sparc/include/asm/pgtsrmmu.h +++ b/arch/sparc/include/asm/pgtsrmmu.h @@ -115,6 +115,13 @@ SRMMU_EXEC | SRMMU_REF) #define SRMMU_PAGE_RDONLY __pgprot(SRMMU_VALID | SRMMU_CACHE | \ SRMMU_EXEC | SRMMU_REF) + +#ifdef CONFIG_PAX_PAGEEXEC +#define SRMMU_PAGE_SHARED_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_WRITE | SRMMU_REF) +#define SRMMU_PAGE_COPY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) +#define SRMMU_PAGE_RDONLY_NOEXEC __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_REF) +#endif + #define SRMMU_PAGE_KERNEL __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \ SRMMU_DIRTY | SRMMU_REF) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 64718ba..a7e4178 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -21,6 +21,10 @@ extern unsigned char boot_cpu_id; extern unsigned char boot_cpu_id4; #endif +#ifdef CONFIG_SPARC64 +void __init start_early_boot(void); +#endif + #endif /* __KERNEL__ */ #endif /* _SPARC_SETUP_H */ diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 9689176..63c18ea 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -92,14 +92,19 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline arch_read_lock(arch_rwlock_t *lock) +static inline void arch_read_lock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; __asm__ __volatile__ ( "1: ldsw [%2], %0\n" " brlz,pn %0, 2f\n" -"4: add %0, 1, %1\n" +"4: addcc %0, 1, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT +" tvs %%icc, 6\n" +#endif + " cas [%2], %0, %1\n" " cmp %0, %1\n" " bne,pn %%icc, 1b\n" @@ -112,10 +117,10 @@ static void inline arch_read_lock(arch_rwlock_t *lock) " .previous" : "=&r" (tmp1), "=&r" (tmp2) : "r" (lock) - : "memory"); + : "memory", "cc"); } -static int inline arch_read_trylock(arch_rwlock_t *lock) +static inline int arch_read_trylock(arch_rwlock_t *lock) { int tmp1, tmp2; @@ -123,7 +128,12 @@ static int inline arch_read_trylock(arch_rwlock_t *lock) "1: ldsw [%2], %0\n" " brlz,a,pn %0, 2f\n" " mov 0, %0\n" -" add %0, 1, %1\n" +" addcc %0, 1, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT +" tvs %%icc, 6\n" +#endif + " cas [%2], %0, %1\n" " cmp %0, %1\n" " bne,pn %%icc, 1b\n" @@ -136,13 +146,18 @@ static int inline arch_read_trylock(arch_rwlock_t *lock) return tmp1; } -static void inline arch_read_unlock(arch_rwlock_t *lock) +static inline void arch_read_unlock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; __asm__ __volatile__( "1: lduw [%2], %0\n" -" sub %0, 1, %1\n" +" subcc %0, 1, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT +" tvs %%icc, 6\n" +#endif + " cas [%2], %0, %1\n" " cmp %0, %1\n" " bne,pn %%xcc, 1b\n" @@ -152,7 +167,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_lock(arch_rwlock_t *lock) +static inline void arch_write_lock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -177,7 +192,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -186,7 +201,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_write_trylock(arch_rwlock_t *lock) +static inline int arch_write_trylock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index fa57532..e1a4c53 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -50,6 +50,8 @@ struct thread_info { unsigned long w_saved; struct restart_block restart_block; + + unsigned long lowest_stack; }; /* diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 60d86be..5e005d8 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -63,7 +63,10 @@ struct thread_info { struct pt_regs *kern_una_regs; unsigned int kern_una_insn; - unsigned long fpregs[0] __attribute__ ((aligned(64))); + unsigned long lowest_stack; + + unsigned long fpregs[(7 * 256) / sizeof(unsigned long)] + __attribute__ ((aligned(64))); }; #endif /* !(__ASSEMBLY__) */ @@ -104,13 +107,15 @@ struct thread_info { #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ #if PAGE_SHIFT == 13 -#define THREAD_SIZE (2*PAGE_SIZE) +#define THREAD_ORDER 1 #define THREAD_SHIFT (PAGE_SHIFT + 1) #else /* PAGE_SHIFT == 13 */ -#define THREAD_SIZE PAGE_SIZE +#define THREAD_ORDER 0 #define THREAD_SHIFT PAGE_SHIFT #endif /* PAGE_SHIFT == 13 */ +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + #define PREEMPT_ACTIVE 0x10000000 /* @@ -214,10 +219,11 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */ /* flag bit 6 is available */ #define TIF_32BIT 7 /* 32-bit binary */ -/* flag bit 8 is available */ +#define TIF_GRSEC_SETXID 8 /* update credentials on syscall entry/exit */ #define TIF_SECCOMP 9 /* secure computing */ #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ + /* NOTE: Thread flags >= 12 should be ones we have no interest * in using in assembly, else we can't use the mask as * an immediate value in instructions such as andcc. @@ -238,12 +244,18 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define _TIF_SYSCALL_TRACEPOINT (1< #else diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 8303ac4..d2eec81 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -46,6 +46,7 @@ #define __user_ok(addr, size) ({ (void)(size); (addr) < STACK_TOP; }) #define __kernel_ok (segment_eq(get_fs(), KERNEL_DS)) #define __access_ok(addr,size) (__user_ok((addr) & get_fs().seg,(size))) +#define access_ok_noprefault(type, addr, size) access_ok((type), (addr), (size)) #define access_ok(type, addr, size) \ ({ (void)(type); __access_ok((unsigned long)(addr), size); }) @@ -249,27 +250,46 @@ extern unsigned long __copy_user(void __user *to, const void __user *from, unsig static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - if (n && __access_ok((unsigned long) to, n)) + if ((long)n < 0) + return n; + + if (n && __access_ok((unsigned long) to, n)) { + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); return __copy_user(to, (__force void __user *) from, n); - else + } else return n; } static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + + if (!__builtin_constant_p(n)) + check_object_size(from, n, true); + return __copy_user(to, (__force void __user *) from, n); } static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (n && __access_ok((unsigned long) from, n)) + if ((long)n < 0) + return n; + + if (n && __access_ok((unsigned long) from, n)) { + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - else + } else return n; } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + return __copy_user((__force void __user *) to, from, n); } diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 6d6c731..c55ac5e 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,11 @@ static inline int __access_ok(const void __user * addr, unsigned long size) return 1; } +static inline int access_ok_noprefault(int type, const void __user * addr, unsigned long size) +{ + return 1; +} + static inline int access_ok(int type, const void __user * addr, unsigned long size) { return 1; @@ -213,8 +219,15 @@ extern unsigned long copy_from_user_fixup(void *to, const void __user *from, static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret = ___copy_from_user(to, from, size); + unsigned long ret; + if ((long)size < 0 || size > INT_MAX) + return size; + + if (!__builtin_constant_p(size)) + check_object_size(to, size, false); + + ret = ___copy_from_user(to, from, size); if (unlikely(ret)) ret = copy_from_user_fixup(to, from, size); @@ -230,8 +243,15 @@ extern unsigned long copy_to_user_fixup(void __user *to, const void *from, static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret = ___copy_to_user(to, from, size); + unsigned long ret; + if ((long)size < 0 || size > INT_MAX) + return size; + + if (!__builtin_constant_p(size)) + check_object_size(from, size, true); + + ret = ___copy_to_user(to, from, size); if (unlikely(ret)) ret = copy_to_user_fixup(to, from, size); return ret; diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index c7cb0af..1f5c6d7 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -335,7 +335,7 @@ #define __NR_timer_getoverrun 264 #define __NR_timer_delete 265 #define __NR_timer_create 266 -/* #define __NR_vserver 267 Reserved for VSERVER */ +#define __NR_vserver 267 #define __NR_io_setup 268 #define __NR_io_destroy 269 #define __NR_io_submit 270 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index cb85458..e063f17 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -3,7 +3,7 @@ # asflags-y := -ansi -ccflags-y := -Werror +#ccflags-y := -Werror extra-y := head_$(BITS).o extra-y += init_task.o diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index 27728e1..0010e923 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -783,6 +783,16 @@ void ldom_set_var(const char *var, const char *value) char *base, *p; int msg_len, loops; + if (strlen(var) + strlen(value) + 2 > + sizeof(pkt) - sizeof(pkt.header)) { + printk(KERN_ERR PFX + "contents length: %zu, which more than max: %lu," + "so could not set (%s) variable to (%s).\n", + strlen(var) + strlen(value) + 2, + sizeof(pkt) - sizeof(pkt.header), var, value); + return; + } + memset(&pkt, 0, sizeof(pkt)); pkt.header.data.tag.type = DS_DATA; pkt.header.data.handle = cp->handle; diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 0c218e4..f8125bc 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -59,13 +59,10 @@ struct popc_6insn_patch_entry { extern struct popc_6insn_patch_entry __popc_6insn_patch, __popc_6insn_patch_end; -extern void __init per_cpu_patch(void); -extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, - struct sun4v_1insn_patch_entry *); -extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, - struct sun4v_2insn_patch_entry *); -extern void __init sun4v_patch(void); -extern void __init boot_cpu_id_too_large(int cpu); +void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, + struct sun4v_1insn_patch_entry *); +void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, + struct sun4v_2insn_patch_entry *); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 0d810c2..fec9fd6 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -629,14 +629,12 @@ tlb_fixup_done: sethi %hi(init_thread_union), %g6 or %g6, %lo(init_thread_union), %g6 ldx [%g6 + TI_TASK], %g4 - mov %sp, %l6 wr %g0, ASI_P, %asi mov 1, %g1 sllx %g1, THREAD_SHIFT, %g1 sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 add %g6, %g1, %sp - mov 0, %fp /* Set per-cpu pointer initially to zero, this makes * the boot-cpu use the in-kernel-image per-cpu areas @@ -663,44 +661,14 @@ tlb_fixup_done: nop #endif - mov %l6, %o1 ! OpenPROM stack call prom_init mov %l7, %o0 ! OpenPROM cif handler - /* Initialize current_thread_info()->cpu as early as possible. - * In order to do that accurately we have to patch up the get_cpuid() - * assembler sequences. And that, in turn, requires that we know - * if we are on a Starfire box or not. While we're here, patch up - * the sun4v sequences as well. + /* To create a one-register-window buffer between the kernel's + * initial stack and the last stack frame we use from the firmware, + * do the rest of the boot from a C helper function. */ - call check_if_starfire - nop - call per_cpu_patch - nop - call sun4v_patch - nop - -#ifdef CONFIG_SMP - call hard_smp_processor_id - nop - cmp %o0, NR_CPUS - blu,pt %xcc, 1f - nop - call boot_cpu_id_too_large - nop - /* Not reached... */ - -1: -#else - mov 0, %o0 -#endif - sth %o0, [%g6 + TI_CPU] - - call prom_init_report - nop - - /* Off we go.... */ - call start_kernel + call start_early_boot nop /* Not reached... */ diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S index 9365432..b69d224 100644 --- a/arch/sparc/kernel/hvtramp.S +++ b/arch/sparc/kernel/hvtramp.S @@ -111,7 +111,6 @@ hv_cpu_startup: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp call init_irqwork_curcpu nop diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index a19c8a0..d04a60b 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -53,11 +53,13 @@ static inline unsigned int leon_eirq_get(int cpu) static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc) { unsigned int eirq; + struct irq_bucket *p; int cpu = sparc_leon3_cpuid(); eirq = leon_eirq_get(cpu); - if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */ - generic_handle_irq(irq_map[eirq]->irq); + p = irq_map[eirq]; + if ((eirq & 0x10) && p && p->irq) /* bit4 tells if IRQ happened */ + generic_handle_irq(p->irq); } /* The extended IRQ controller has been found, this function registers it */ diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index f793742..4d880af 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -204,7 +204,7 @@ void __show_backtrace(unsigned long fp) rw->ins[4], rw->ins[5], rw->ins[6], rw->ins[7]); - printk("%pS\n", (void *) rw->ins[7]); + printk("%pA\n", (void *) rw->ins[7]); rw = (struct reg_window32 *) rw->ins[6]; } spin_unlock_irqrestore(&sparc_backtrace_lock, flags); @@ -271,14 +271,14 @@ void show_regs(struct pt_regs *r) printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx %s\n", r->psr, r->pc, r->npc, r->y, print_tainted()); - printk("PC: <%pS>\n", (void *) r->pc); + printk("PC: <%pA>\n", (void *) r->pc); printk("%%G: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", r->u_regs[0], r->u_regs[1], r->u_regs[2], r->u_regs[3], r->u_regs[4], r->u_regs[5], r->u_regs[6], r->u_regs[7]); printk("%%O: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", r->u_regs[8], r->u_regs[9], r->u_regs[10], r->u_regs[11], r->u_regs[12], r->u_regs[13], r->u_regs[14], r->u_regs[15]); - printk("RPC: <%pS>\n", (void *) r->u_regs[15]); + printk("RPC: <%pA>\n", (void *) r->u_regs[15]); printk("%%L: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", rw->locals[0], rw->locals[1], rw->locals[2], rw->locals[3], @@ -313,7 +313,7 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) rw = (struct reg_window32 *) fp; pc = rw->ins[7]; printk("[%08lx : ", pc); - printk("%pS ] ", (void *) pc); + printk("%pA ] ", (void *) pc); fp = rw->ins[6]; } while (++count < 16); printk("\n"); diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 3739a06..48b2ff0 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -180,14 +180,14 @@ static void show_regwindow(struct pt_regs *regs) printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n", rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]); if (regs->tstate & TSTATE_PRIV) - printk("I7: <%pS>\n", (void *) rwk->ins[7]); + printk("I7: <%pA>\n", (void *) rwk->ins[7]); } void show_regs(struct pt_regs *regs) { printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x %s\n", regs->tstate, regs->tpc, regs->tnpc, regs->y, print_tainted()); - printk("TPC: <%pS>\n", (void *) regs->tpc); + printk("TPC: <%pA>\n", (void *) regs->tpc); printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n", regs->u_regs[0], regs->u_regs[1], regs->u_regs[2], regs->u_regs[3]); @@ -200,7 +200,7 @@ void show_regs(struct pt_regs *regs) printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n", regs->u_regs[12], regs->u_regs[13], regs->u_regs[14], regs->u_regs[15]); - printk("RPC: <%pS>\n", (void *) regs->u_regs[15]); + printk("RPC: <%pA>\n", (void *) regs->u_regs[15]); show_regwindow(regs); show_stack(current, (unsigned long *) regs->u_regs[UREG_FP]); } @@ -285,7 +285,7 @@ void arch_trigger_all_cpu_backtrace(void) ((tp && tp->task) ? tp->task->pid : -1)); if (gp->tstate & TSTATE_PRIV) { - printk(" TPC[%pS] O7[%pS] I7[%pS] RPC[%pS]\n", + printk(" TPC[%pA] O7[%pA] I7[%pA] RPC[%pA]\n", (void *) gp->tpc, (void *) gp->o7, (void *) gp->i7, diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c index 741df91..97cdf05 100644 --- a/arch/sparc/kernel/prom_common.c +++ b/arch/sparc/kernel/prom_common.c @@ -144,7 +144,7 @@ static int __init prom_common_nextprop(phandle node, char *prev, char *buf) unsigned int prom_early_allocated __initdata; -static struct of_pdt_ops prom_sparc_ops __initdata = { +static struct of_pdt_ops prom_sparc_ops __initconst = { .nextprop = prom_common_nextprop, .getproplen = prom_getproplen, .getproperty = prom_getproperty, diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 96ee50a..68ce124 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -1058,6 +1058,10 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } +#ifdef CONFIG_GRKERNSEC_SETXID +extern void gr_delayed_cred_worker(void); +#endif + asmlinkage int syscall_trace_enter(struct pt_regs *regs) { int ret = 0; @@ -1065,6 +1069,11 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) /* do the secure computing check first */ secure_computing(regs->u_regs[UREG_G1]); +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + if (test_thread_flag(TIF_SYSCALL_TRACE)) ret = tracehook_report_syscall_entry(regs); @@ -1086,6 +1095,11 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) asmlinkage void syscall_trace_leave(struct pt_regs *regs) { +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + #ifdef CONFIG_AUDITSYSCALL if (unlikely(current->audit_context)) { unsigned long tstate = regs->tstate; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index a854a1c..52488a5 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void __init per_cpu_patch(void) +static void __init per_cpu_patch(void) { struct cpuid_patch_entry *p; unsigned long ver; @@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } -void __init sun4v_patch(void) +static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -316,14 +317,25 @@ static void __init popc_patch(void) } } -#ifdef CONFIG_SMP -void __init boot_cpu_id_too_large(int cpu) +void __init start_early_boot(void) { - prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", - cpu, NR_CPUS); - prom_halt(); + int cpu; + + check_if_starfire(); + per_cpu_patch(); + sun4v_patch(); + + cpu = hard_smp_processor_id(); + if (cpu >= NR_CPUS) { + prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", + cpu, NR_CPUS); + prom_halt(); + } + current_thread_info()->cpu = cpu; + + prom_init_report(); + start_kernel(); } -#endif /* On Ultra, we support all of the v8 capabilities. */ unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index ffd1245..948b0b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -871,8 +871,8 @@ extern unsigned long xcall_flush_dcache_page_cheetah; extern unsigned long xcall_flush_dcache_page_spitfire; #ifdef CONFIG_DEBUG_DCFLUSH -extern atomic_t dcpage_flushes; -extern atomic_t dcpage_flushes_xcall; +extern atomic_unchecked_t dcpage_flushes; +extern atomic_unchecked_t dcpage_flushes_xcall; #endif static inline void __local_flush_dcache_page(struct page *page) @@ -896,7 +896,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) return; #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes); + atomic_inc_unchecked(&dcpage_flushes); #endif this_cpu = get_cpu(); @@ -920,7 +920,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) xcall_deliver(data0, __pa(pg_addr), (u64) pg_addr, cpumask_of(cpu)); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes_xcall); + atomic_inc_unchecked(&dcpage_flushes_xcall); #endif } } @@ -939,7 +939,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) preempt_disable(); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes); + atomic_inc_unchecked(&dcpage_flushes); #endif data0 = 0; pg_addr = page_address(page); @@ -956,7 +956,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) xcall_deliver(data0, __pa(pg_addr), (u64) pg_addr, cpu_online_mask); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes_xcall); + atomic_inc_unchecked(&dcpage_flushes_xcall); #endif } __local_flush_dcache_page(page); diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 42b282f..408977c 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -39,6 +39,7 @@ asmlinkage unsigned long sys_getpagesize(void) unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct vm_area_struct * vmm; + unsigned long offset = gr_rand_threadstack_offset(current->mm, filp, flags); if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate @@ -56,7 +57,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi if (ARCH_SUN4C && len > 0x20000000) return -ENOMEM; if (!addr) - addr = TASK_UNMAPPED_BASE; + addr = current->mm->mmap_base; if (flags & MAP_SHARED) addr = COLOUR_ALIGN(addr); @@ -71,7 +72,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi } if (TASK_SIZE - PAGE_SIZE - len < addr) return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) + if (check_heap_stack_gap(vmm, &addr, len, offset)) return addr; addr = vmm->vm_end; if (flags & MAP_SHARED) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 5e4252b..379f84f 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -119,12 +119,13 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi unsigned long task_size = TASK_SIZE; unsigned long start_addr; int do_color_align; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); if (flags & MAP_FIXED) { /* We do not accept a shared mapping if it would violate * cache aliasing constraints. */ - if ((flags & MAP_SHARED) && + if ((filp || (flags & MAP_SHARED)) && ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) return -EINVAL; return addr; @@ -139,6 +140,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi if (filp || (flags & MAP_SHARED)) do_color_align = 1; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -146,15 +151,14 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; + start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -174,14 +178,14 @@ full_search: vma = find_vma(mm, VA_EXCLUDE_END); } if (unlikely(task_size < addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (likely(!vma || addr + len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, &addr, len, offset))) { /* * Remember the place where we stopped the search: */ @@ -207,6 +211,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long task_size = STACK_TOP32; unsigned long addr = addr0; int do_color_align; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); /* This should only ever run for 32-bit processes. */ BUG_ON(!test_thread_flag(TIF_32BIT)); @@ -215,7 +220,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* We do not accept a shared mapping if it would violate * cache aliasing constraints. */ - if ((flags & MAP_SHARED) && + if ((filp || (flags & MAP_SHARED)) && ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))) return -EINVAL; return addr; @@ -236,8 +241,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } @@ -257,28 +261,29 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { + addr -= len; + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return (mm->free_area_cache = addr); } } if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = mm->mmap_base-len; - if (do_color_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); + addr = mm->mmap_base - len; do { + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, &addr, len, offset))) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); } @@ -288,10 +293,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - if (do_color_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -361,10 +364,14 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, u EXPORT_SYMBOL(get_fb_unmapped_area); /* Essentially the same as PowerPC. */ -static unsigned long mmap_rnd(void) +static unsigned long mmap_rnd(struct mm_struct *mm) { unsigned long rnd = 0UL; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (current->flags & PF_RANDOMIZE) { unsigned long val = get_random_int(); if (test_thread_flag(TIF_32BIT)) @@ -377,7 +384,7 @@ static unsigned long mmap_rnd(void) void arch_pick_mmap_layout(struct mm_struct *mm) { - unsigned long random_factor = mmap_rnd(); + unsigned long random_factor = mmap_rnd(mm); unsigned long gap; /* @@ -390,6 +397,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm) gap == RLIM_INFINITY || sysctl_legacy_va_layout) { mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + mm->get_unmapped_area = arch_get_unmapped_area; mm->unmap_area = arch_unmap_area; } else { @@ -402,6 +415,12 @@ void arch_pick_mmap_layout(struct mm_struct *mm) gap = (task_size / 6 * 5); mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; +#endif + mm->get_unmapped_area = arch_get_unmapped_area_topdown; mm->unmap_area = arch_unmap_area_topdown; } diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 557212c..2cc50b0 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -62,7 +62,7 @@ sys32_rt_sigreturn: #endif .align 32 1: ldx [%g6 + TI_FLAGS], %l5 - andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l5, _TIF_WORK_SYSCALL, %g0 be,pt %icc, rtrap nop call syscall_trace_leave @@ -179,7 +179,7 @@ linux_sparc_syscall32: srl %i3, 0, %o3 ! IEU0 srl %i2, 0, %o2 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l0, _TIF_WORK_SYSCALL, %g0 bne,pn %icc, linux_syscall_trace32 ! CTI mov %i0, %l5 ! IEU1 5: call %l7 ! CTI Group brk forced @@ -203,7 +203,7 @@ linux_sparc_syscall: mov %i3, %o3 ! IEU1 mov %i4, %o4 ! IEU0 Group - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l0, _TIF_WORK_SYSCALL, %g0 bne,pn %icc, linux_syscall_trace ! CTI Group mov %i0, %l5 ! IEU0 2: call %l7 ! CTI Group brk forced @@ -218,7 +218,7 @@ ret_sys_call: cmp %o0, -ERESTART_RESTARTBLOCK bgeu,pn %xcc, 1f - andcc %l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT), %g0 + andcc %l0, _TIF_WORK_SYSCALL, %g0 ldx [%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc 2: diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c index 7408201..b349841 100644 --- a/arch/sparc/kernel/sysfs.c +++ b/arch/sparc/kernel/sysfs.c @@ -266,7 +266,7 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata sysfs_cpu_nb = { +static struct notifier_block sysfs_cpu_nb = { .notifier_call = sysfs_cpu_notify, }; diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 63402f9..317e23c 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -70,7 +70,7 @@ sys_call_table: /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun -/*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy +/*265*/ .long sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink /*275*/ .long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 3a58e0d..a782653 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -71,7 +71,7 @@ sys_call_table32: /*250*/ .word sys_mremap, compat_sys_sysctl, sys32_getsid, sys_fdatasync, sys_nis_syscall .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy + .word sys_timer_delete, compat_sys_timer_create, sys32_vserver, compat_sys_io_setup, sys_io_destroy /*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys32_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat @@ -148,7 +148,7 @@ sys_call_table: /*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun - .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy + .word sys_timer_delete, sys_timer_create, sys_vserver, sys_io_setup, sys_io_destroy /*270*/ .word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink .word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid /*280*/ .word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S index 8fa84a3..3fc8ad5 100644 --- a/arch/sparc/kernel/trampoline_64.S +++ b/arch/sparc/kernel/trampoline_64.S @@ -112,10 +112,13 @@ startup_continue: brnz,pn %g1, 1b nop - sethi %hi(p1275buf), %g2 - or %g2, %lo(p1275buf), %g2 - ldx [%g2 + 0x10], %l2 - add %l2, -(192 + 128), %sp + /* Get onto temporary stack which will be in the locked + * kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp flushw /* Setup the loop variables: @@ -397,7 +400,6 @@ after_lock_tlb: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 591f20c..0f1b925 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -45,6 +45,8 @@ static void instruction_dump(unsigned long *pc) #define __SAVE __asm__ __volatile__("save %sp, -0x40, %sp\n\t") #define __RESTORE __asm__ __volatile__("restore %g0, %g0, %g0\n\t") +extern void gr_handle_kernel_exploit(void); + void die_if_kernel(char *str, struct pt_regs *regs) { static int die_counter; @@ -77,15 +79,17 @@ void die_if_kernel(char *str, struct pt_regs *regs) count++ < 30 && (((unsigned long) rw) >= PAGE_OFFSET) && !(((unsigned long) rw) & 0x7)) { - printk("Caller[%08lx]: %pS\n", rw->ins[7], + printk("Caller[%08lx]: %pA\n", rw->ins[7], (void *) rw->ins[7]); rw = (struct reg_window32 *)rw->ins[6]; } } printk("Instruction DUMP:"); instruction_dump ((unsigned long *) regs->pc); - if(regs->psr & PSR_PS) + if(regs->psr & PSR_PS) { + gr_handle_kernel_exploit(); do_exit(SIGKILL); + } do_exit(SIGSEGV); } diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 0cbdaa4..f37a97c 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -75,7 +75,7 @@ static void dump_tl1_traplog(struct tl1_traplog *p) i + 1, p->trapstack[i].tstate, p->trapstack[i].tpc, p->trapstack[i].tnpc, p->trapstack[i].tt); - printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc); + printk("TRAPLOG: TPC<%pA>\n", (void *) p->trapstack[i].tpc); } } @@ -95,6 +95,12 @@ void bad_trap(struct pt_regs *regs, long lvl) lvl -= 0x100; if (regs->tstate & TSTATE_PRIV) { + +#ifdef CONFIG_PAX_REFCOUNT + if (lvl == 6) + pax_report_refcount_overflow(regs); +#endif + sprintf(buffer, "Kernel bad sw trap %lx", lvl); die_if_kernel(buffer, regs); } @@ -113,11 +119,16 @@ void bad_trap(struct pt_regs *regs, long lvl) void bad_trap_tl1(struct pt_regs *regs, long lvl) { char buffer[32]; - + if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs, 0, lvl, SIGTRAP) == NOTIFY_STOP) return; +#ifdef CONFIG_PAX_REFCOUNT + if (lvl == 6) + pax_report_refcount_overflow(regs); +#endif + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); sprintf (buffer, "Bad trap %lx at tl>0", lvl); @@ -1141,7 +1152,7 @@ static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *in regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate); printk("%s" "ERROR(%d): ", (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id()); - printk("TPC<%pS>\n", (void *) regs->tpc); + printk("TPC<%pA>\n", (void *) regs->tpc); printk("%s" "ERROR(%d): M_SYND(%lx), E_SYND(%lx)%s%s\n", (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(), (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT, @@ -1748,7 +1759,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) smp_processor_id(), (type & 0x1) ? 'I' : 'D', regs->tpc); - printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "TPC<%pA>\n", (void *) regs->tpc); panic("Irrecoverable Cheetah+ parity error."); } @@ -1756,7 +1767,7 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) smp_processor_id(), (type & 0x1) ? 'I' : 'D', regs->tpc); - printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_WARNING "TPC<%pA>\n", (void *) regs->tpc); } struct sun4v_error_entry { @@ -1786,8 +1797,8 @@ struct sun4v_error_entry { u16 err_pad; }; -static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0); -static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0); +static atomic_unchecked_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0); +static atomic_unchecked_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0); static const char *sun4v_err_type_to_str(u32 type) { @@ -1807,7 +1818,7 @@ static const char *sun4v_err_type_to_str(u32 type) } } -static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt) +static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_unchecked_t *ocnt) { int cnt; @@ -1842,8 +1853,8 @@ static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, show_regs(regs); - if ((cnt = atomic_read(ocnt)) != 0) { - atomic_set(ocnt, 0); + if ((cnt = atomic_read_unchecked(ocnt)) != 0) { + atomic_set_unchecked(ocnt, 0); wmb(); printk("%s: Queue overflowed %d times.\n", pfx, cnt); @@ -1895,7 +1906,7 @@ void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) */ void sun4v_resum_overflow(struct pt_regs *regs) { - atomic_inc(&sun4v_resum_oflow_cnt); + atomic_inc_unchecked(&sun4v_resum_oflow_cnt); } /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. @@ -1948,7 +1959,7 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) /* XXX Actually even this can make not that much sense. Perhaps * XXX we should just pull the plug and panic directly from here? */ - atomic_inc(&sun4v_nonresum_oflow_cnt); + atomic_inc_unchecked(&sun4v_nonresum_oflow_cnt); } unsigned long sun4v_err_itlb_vaddr; @@ -1963,9 +1974,9 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); - printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "SUN4V-ITLB: TPC<%pA>\n", (void *) regs->tpc); printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]); - printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n", + printk(KERN_EMERG "SUN4V-ITLB: O7<%pA>\n", (void *) regs->u_regs[UREG_I7]); printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] " "pte[%lx] error[%lx]\n", @@ -1987,9 +1998,9 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); - printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc); + printk(KERN_EMERG "SUN4V-DTLB: TPC<%pA>\n", (void *) regs->tpc); printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]); - printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n", + printk(KERN_EMERG "SUN4V-DTLB: O7<%pA>\n", (void *) regs->u_regs[UREG_I7]); printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] " "pte[%lx] error[%lx]\n", @@ -2195,13 +2206,13 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) fp = (unsigned long)sf->fp + STACK_BIAS; } - printk(" [%016lx] %pS\n", pc, (void *) pc); + printk(" [%016lx] %pA\n", pc, (void *) pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = tsk->curr_ret_stack; if (tsk->ret_stack && index >= graph) { pc = tsk->ret_stack[index - graph].ret; - printk(" [%016lx] %pS\n", pc, (void *) pc); + printk(" [%016lx] %pA\n", pc, (void *) pc); graph++; } } @@ -2226,6 +2237,8 @@ static inline struct reg_window *kernel_stack_up(struct reg_window *rw) return (struct reg_window *) (fp + STACK_BIAS); } +extern void gr_handle_kernel_exploit(void); + void die_if_kernel(char *str, struct pt_regs *regs) { static int die_counter; @@ -2254,7 +2267,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) while (rw && count++ < 30 && kstack_valid(tp, (unsigned long) rw)) { - printk("Caller[%016lx]: %pS\n", rw->ins[7], + printk("Caller[%016lx]: %pA\n", rw->ins[7], (void *) rw->ins[7]); rw = kernel_stack_up(rw); @@ -2267,8 +2280,10 @@ void die_if_kernel(char *str, struct pt_regs *regs) } user_instruction_dump ((unsigned int __user *) regs->tpc); } - if (regs->tstate & TSTATE_PRIV) + if (regs->tstate & TSTATE_PRIV) { + gr_handle_kernel_exploit(); do_exit(SIGKILL); + } do_exit(SIGSEGV); } EXPORT_SYMBOL(die_if_kernel); diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 1c90812..439d7e8 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -285,7 +285,7 @@ static void log_unaligned(struct pt_regs *regs) static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); if (__ratelimit(&ratelimit)) { - printk("Kernel unaligned access at TPC[%lx] %pS\n", + printk("Kernel unaligned access at TPC[%lx] %pA\n", regs->tpc, (void *) regs->tpc); } } diff --git a/arch/sparc/kernel/us3_cpufreq.c b/arch/sparc/kernel/us3_cpufreq.c index eb1624b..55100de 100644 --- a/arch/sparc/kernel/us3_cpufreq.c +++ b/arch/sparc/kernel/us3_cpufreq.c @@ -18,14 +18,12 @@ #include #include -static struct cpufreq_driver *cpufreq_us3_driver; - struct us3_freq_percpu_info { struct cpufreq_frequency_table table[4]; }; /* Indexed by cpu number. */ -static struct us3_freq_percpu_info *us3_freq_table; +static struct us3_freq_percpu_info us3_freq_table[NR_CPUS]; /* UltraSPARC-III has three dividers: 1, 2, and 32. These are controlled * in the Safari config register. @@ -191,12 +189,25 @@ static int __init us3_freq_cpu_init(struct cpufreq_policy *policy) static int us3_freq_cpu_exit(struct cpufreq_policy *policy) { - if (cpufreq_us3_driver) - us3_set_cpu_divider_index(policy->cpu, 0); + us3_set_cpu_divider_index(policy->cpu, 0); return 0; } +static int __init us3_freq_init(void); +static void __exit us3_freq_exit(void); + +static struct cpufreq_driver cpufreq_us3_driver = { + .init = us3_freq_cpu_init, + .verify = us3_freq_verify, + .target = us3_freq_target, + .get = us3_freq_get, + .exit = us3_freq_cpu_exit, + .owner = THIS_MODULE, + .name = "UltraSPARC-III", + +}; + static int __init us3_freq_init(void) { unsigned long manuf, impl, ver; @@ -213,57 +224,15 @@ static int __init us3_freq_init(void) (impl == CHEETAH_IMPL || impl == CHEETAH_PLUS_IMPL || impl == JAGUAR_IMPL || - impl == PANTHER_IMPL)) { - struct cpufreq_driver *driver; - - ret = -ENOMEM; - driver = kzalloc(sizeof(struct cpufreq_driver), GFP_KERNEL); - if (!driver) - goto err_out; - - us3_freq_table = kzalloc( - (NR_CPUS * sizeof(struct us3_freq_percpu_info)), - GFP_KERNEL); - if (!us3_freq_table) - goto err_out; - - driver->init = us3_freq_cpu_init; - driver->verify = us3_freq_verify; - driver->target = us3_freq_target; - driver->get = us3_freq_get; - driver->exit = us3_freq_cpu_exit; - driver->owner = THIS_MODULE, - strcpy(driver->name, "UltraSPARC-III"); - - cpufreq_us3_driver = driver; - ret = cpufreq_register_driver(driver); - if (ret) - goto err_out; - - return 0; - -err_out: - if (driver) { - kfree(driver); - cpufreq_us3_driver = NULL; - } - kfree(us3_freq_table); - us3_freq_table = NULL; - return ret; - } + impl == PANTHER_IMPL)) + return cpufreq_register_driver(&cpufreq_us3_driver); return -ENODEV; } static void __exit us3_freq_exit(void) { - if (cpufreq_us3_driver) { - cpufreq_unregister_driver(cpufreq_us3_driver); - kfree(cpufreq_us3_driver); - cpufreq_us3_driver = NULL; - kfree(us3_freq_table); - us3_freq_table = NULL; - } + cpufreq_unregister_driver(&cpufreq_us3_driver); } MODULE_AUTHOR("David S. Miller "); diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 4961516..f82ff86 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -2,7 +2,7 @@ # asflags-y := -ansi -DST_DIV0=0x02 -ccflags-y := -Werror +#ccflags-y := -Werror lib-$(CONFIG_SPARC32) += mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o lib-$(CONFIG_SPARC32) += memcpy.o memset.o diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S index 59186e0..f747d7a 100644 --- a/arch/sparc/lib/atomic_64.S +++ b/arch/sparc/lib/atomic_64.S @@ -18,7 +18,12 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, BACKOFF_LABEL(2f, 1b) @@ -28,12 +33,32 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic_add, .-atomic_add + .globl atomic_add_unchecked + .type atomic_add_unchecked,#function +atomic_add_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: lduw [%o1], %g1 + add %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic_add_unchecked, .-atomic_add_unchecked + .globl atomic_sub .type atomic_sub,#function atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, BACKOFF_LABEL(2f, 1b) @@ -43,12 +68,32 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic_sub, .-atomic_sub + .globl atomic_sub_unchecked + .type atomic_sub_unchecked,#function +atomic_sub_unchecked: /* %o0 = decrement, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: lduw [%o1], %g1 + sub %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic_sub_unchecked, .-atomic_sub_unchecked + .globl atomic_add_ret .type atomic_add_ret,#function atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, BACKOFF_LABEL(2f, 1b) @@ -58,12 +103,33 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic_add_ret, .-atomic_add_ret + .globl atomic_add_ret_unchecked + .type atomic_add_ret_unchecked,#function +atomic_add_ret_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: lduw [%o1], %g1 + addcc %g1, %o0, %g7 + cas [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %icc, 2f + add %g7, %o0, %g7 + sra %g7, 0, %o0 + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic_add_ret_unchecked, .-atomic_add_ret_unchecked + .globl atomic_sub_ret .type atomic_sub_ret,#function atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: lduw [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %icc, 6 +#endif + cas [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %icc, BACKOFF_LABEL(2f, 1b) @@ -78,7 +144,12 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, BACKOFF_LABEL(2f, 1b) @@ -88,12 +159,32 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic64_add, .-atomic64_add + .globl atomic64_add_unchecked + .type atomic64_add_unchecked,#function +atomic64_add_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o1], %g1 + addcc %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic64_add_unchecked, .-atomic64_add_unchecked + .globl atomic64_sub .type atomic64_sub,#function atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, BACKOFF_LABEL(2f, 1b) @@ -103,12 +194,32 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic64_sub, .-atomic64_sub + .globl atomic64_sub_unchecked + .type atomic64_sub_unchecked,#function +atomic64_sub_unchecked: /* %o0 = decrement, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o1], %g1 + subcc %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, 2f + nop + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic64_sub_unchecked, .-atomic64_sub_unchecked + .globl atomic64_add_ret .type atomic64_add_ret,#function atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - add %g1, %o0, %g7 + addcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, BACKOFF_LABEL(2f, 1b) @@ -118,12 +229,33 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */ 2: BACKOFF_SPIN(%o2, %o3, 1b) .size atomic64_add_ret, .-atomic64_add_ret + .globl atomic64_add_ret_unchecked + .type atomic64_add_ret_unchecked,#function +atomic64_add_ret_unchecked: /* %o0 = increment, %o1 = atomic_ptr */ + BACKOFF_SETUP(%o2) +1: ldx [%o1], %g1 + addcc %g1, %o0, %g7 + casx [%o1], %g1, %g7 + cmp %g1, %g7 + bne,pn %xcc, 2f + add %g7, %o0, %g7 + mov %g7, %o0 + retl + nop +2: BACKOFF_SPIN(%o2, %o3, 1b) + .size atomic64_add_ret_unchecked, .-atomic64_add_ret_unchecked + .globl atomic64_sub_ret .type atomic64_sub_ret,#function atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */ BACKOFF_SETUP(%o2) 1: ldx [%o1], %g1 - sub %g1, %o0, %g7 + subcc %g1, %o0, %g7 + +#ifdef CONFIG_PAX_REFCOUNT + tvs %xcc, 6 +#endif + casx [%o1], %g1, %g7 cmp %g1, %g7 bne,pn %xcc, BACKOFF_LABEL(2f, 1b) diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index fbb8005..984a269 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c @@ -133,12 +133,18 @@ EXPORT_SYMBOL(__clear_user); /* Atomic counter implementation. */ EXPORT_SYMBOL(atomic_add); +EXPORT_SYMBOL(atomic_add_unchecked); EXPORT_SYMBOL(atomic_add_ret); +EXPORT_SYMBOL(atomic_add_ret_unchecked); EXPORT_SYMBOL(atomic_sub); +EXPORT_SYMBOL(atomic_sub_unchecked); EXPORT_SYMBOL(atomic_sub_ret); EXPORT_SYMBOL(atomic64_add); +EXPORT_SYMBOL(atomic64_add_unchecked); EXPORT_SYMBOL(atomic64_add_ret); +EXPORT_SYMBOL(atomic64_add_ret_unchecked); EXPORT_SYMBOL(atomic64_sub); +EXPORT_SYMBOL(atomic64_sub_unchecked); EXPORT_SYMBOL(atomic64_sub_ret); /* Atomic bit operations. */ diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 301421c1..e2535d1 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -2,7 +2,7 @@ # asflags-y := -ansi -ccflags-y := -Werror +#ccflags-y := -Werror obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o obj-y += fault_$(BITS).o diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 802b806..483d6e9 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include #include @@ -208,6 +211,277 @@ static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) return safe_compute_effective_address(regs, insn); } +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_DLRESOLVE +static void pax_emuplt_close(struct vm_area_struct *vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + unsigned int *kaddr; + + vmf->page = alloc_page(GFP_HIGHUSER); + if (!vmf->page) + return VM_FAULT_OOM; + + kaddr = kmap(vmf->page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(vmf->page); + kunmap(vmf->page); + return VM_FAULT_MAJOR; +} + +static const struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .fault = pax_emuplt_fault +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + INIT_LIST_HEAD(&vma->anon_vma_chain); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int *)regs->pc); + err |= get_user(sethi2, (unsigned int *)(regs->pc+4)); + err |= get_user(jmpl, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned int addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int *)regs->pc); + + if (err) + break; + + if ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30480000U) { + unsigned int addr; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->pc + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + else + addr = regs->pc + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, bajmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->pc); + err |= get_user(bajmpl, (unsigned int *)(regs->pc+4)); + err |= get_user(nop, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((bajmpl & 0xFFFFE000U) == 0x81C06000U || (bajmpl & 0xFFF80000U) == 0x30480000U) && + nop == 0x01000000U) + { + unsigned int addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + if ((bajmpl & 0xFFFFE000U) == 0x81C06000U) + addr += (((bajmpl | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + else + addr = regs->pc + ((((bajmpl | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + regs->pc = addr; + regs->npc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int *)regs->pc); + err |= get_user(ba, (unsigned int *)(regs->pc+4)); + err |= get_user(nop, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned int addr, save, call; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->pc + 4 + ((((ba | 0xFFC00000U) ^ 0x00200000U) + 0x00200000U) << 2); + else + addr = regs->pc + 4 + ((((ba | 0xFFF80000U) ^ 0x00040000U) + 0x00040000U) << 2); + + err = get_user(save, (unsigned int *)addr); + err |= get_user(call, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + if (err) + break; + +#ifdef CONFIG_PAX_DLRESOLVE + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->pc = call_dl_resolve; + regs->npc = addr+4; + return 3; + } +#endif + + /* PaX: glibc 2.4+ generates sethi/jmpl instead of save/call */ + if ((save & 0xFFC00000U) == 0x05000000U && + (call & 0xFFFFE000U) == 0x85C0A000U && + nop == 0x01000000U) + { + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G2] = addr + 4; + addr = (save & 0x003FFFFFU) << 10; + addr += (((call | 0xFFFFE000U) ^ 0x00001000U) + 0x00001000U); + regs->pc = addr; + regs->npc = addr+4; + return 3; + } + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int *)(regs->pc-4)); + err |= get_user(call, (unsigned int *)regs->pc); + err |= get_user(nop, (unsigned int *)(regs->pc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned int dl_resolve = regs->pc + ((((call | 0xC0000000U) ^ 0x20000000U) + 0x20000000U) << 2); + + regs->u_regs[UREG_RETPC] = regs->pc; + regs->pc = dl_resolve; + regs->npc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + static noinline void do_fault_siginfo(int code, int sig, struct pt_regs *regs, int text_fault) { @@ -280,6 +554,24 @@ good_area: if(!(vma->vm_flags & VM_WRITE)) goto bad_area; } else { + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && text_fault && !(vma->vm_flags & VM_EXEC)) { + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void *)regs->pc, (void *)regs->u_regs[UREG_FP]); + do_group_exit(SIGKILL); + } +#endif + /* Allow reads even for write-only mappings */ if(!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index bfd7c02..6e941d8 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -21,6 +21,9 @@ #include #include #include +#include +#include +#include #include #include @@ -74,7 +77,7 @@ static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", regs->tpc); printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]); - printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]); + printk("OOPS: RPC <%pA>\n", (void *) regs->u_regs[15]); printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); dump_stack(); unhandled_fault(regs->tpc, current, regs); @@ -282,6 +285,466 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) show_regs(regs); } +#ifdef CONFIG_PAX_PAGEEXEC +#ifdef CONFIG_PAX_DLRESOLVE +static void pax_emuplt_close(struct vm_area_struct *vma) +{ + vma->vm_mm->call_dl_resolve = 0UL; +} + +static int pax_emuplt_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + unsigned int *kaddr; + + vmf->page = alloc_page(GFP_HIGHUSER); + if (!vmf->page) + return VM_FAULT_OOM; + + kaddr = kmap(vmf->page); + memset(kaddr, 0, PAGE_SIZE); + kaddr[0] = 0x9DE3BFA8U; /* save */ + flush_dcache_page(vmf->page); + kunmap(vmf->page); + return VM_FAULT_MAJOR; +} + +static const struct vm_operations_struct pax_vm_ops = { + .close = pax_emuplt_close, + .fault = pax_emuplt_fault +}; + +static int pax_insert_vma(struct vm_area_struct *vma, unsigned long addr) +{ + int ret; + + INIT_LIST_HEAD(&vma->anon_vma_chain); + vma->vm_mm = current->mm; + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + vma->vm_ops = &pax_vm_ops; + + ret = insert_vm_struct(current->mm, vma); + if (ret) + return ret; + + ++current->mm->total_vm; + return 0; +} +#endif + +/* + * PaX: decide what to do with offenders (regs->tpc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int sethi1, sethi2, jmpl; + + err = get_user(sethi1, (unsigned int *)regs->tpc); + err |= get_user(sethi2, (unsigned int *)(regs->tpc+4)); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x03000000U && + (jmpl & 0xFFFFE000U) == 0x81C06000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi2 & 0x003FFFFFU) << 10; + addr = regs->u_regs[UREG_G1]; + addr += (((jmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ba; + + err = get_user(ba, (unsigned int *)regs->tpc); + + if (err) + break; + + if ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30480000U) { + unsigned long addr; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + else + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #3 */ + unsigned int sethi, bajmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(bajmpl, (unsigned int *)(regs->tpc+4)); + err |= get_user(nop, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((bajmpl & 0xFFFFE000U) == 0x81C06000U || (bajmpl & 0xFFF80000U) == 0x30480000U) && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + if ((bajmpl & 0xFFFFE000U) == 0x81C06000U) + addr += (((bajmpl | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + else + addr = regs->tpc + ((((bajmpl | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #4 */ + unsigned int sethi, mov1, call, mov2; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(mov1, (unsigned int *)(regs->tpc+4)); + err |= get_user(call, (unsigned int *)(regs->tpc+8)); + err |= get_user(mov2, (unsigned int *)(regs->tpc+12)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + mov1 == 0x8210000FU && + (call & 0xC0000000U) == 0x40000000U && + mov2 == 0x9E100001U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = regs->u_regs[UREG_RETPC]; + addr = regs->tpc + 4 + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #5 */ + unsigned int sethi, sethi1, sethi2, or1, or2, sllx, jmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(sethi1, (unsigned int *)(regs->tpc+4)); + err |= get_user(sethi2, (unsigned int *)(regs->tpc+8)); + err |= get_user(or1, (unsigned int *)(regs->tpc+12)); + err |= get_user(or2, (unsigned int *)(regs->tpc+16)); + err |= get_user(sllx, (unsigned int *)(regs->tpc+20)); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+24)); + err |= get_user(nop, (unsigned int *)(regs->tpc+28)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + (or1 & 0xFFFFE000U) == 0x82106000U && + (or2 & 0xFFFFE000U) == 0x8A116000U && + sllx == 0x83287020U && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #6 */ + unsigned int sethi, sethi1, sethi2, sllx, or, jmpl, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(sethi1, (unsigned int *)(regs->tpc+4)); + err |= get_user(sethi2, (unsigned int *)(regs->tpc+8)); + err |= get_user(sllx, (unsigned int *)(regs->tpc+12)); + err |= get_user(or, (unsigned int *)(regs->tpc+16)); + err |= get_user(jmpl, (unsigned int *)(regs->tpc+20)); + err |= get_user(nop, (unsigned int *)(regs->tpc+24)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (sethi1 & 0xFFC00000U) == 0x03000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + sllx == 0x83287020U && + (or & 0xFFFFE000U) == 0x8A116000U && + jmpl == 0x81C04005U && + nop == 0x01000000U) + { + unsigned long addr; + + regs->u_regs[UREG_G1] = (sethi1 & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or & 0x3FFU); + addr = regs->u_regs[UREG_G1] + regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation step 1 */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(ba, (unsigned int *)(regs->tpc+4)); + err |= get_user(nop, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + ((ba & 0xFFC00000U) == 0x30800000U || (ba & 0xFFF80000U) == 0x30680000U) && + nop == 0x01000000U) + { + unsigned long addr; + unsigned int save, call; + unsigned int sethi1, sethi2, or1, or2, sllx, add, jmpl; + + if ((ba & 0xFFC00000U) == 0x30800000U) + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFC00000UL) ^ 0x00200000UL) + 0x00200000UL) << 2); + else + addr = regs->tpc + 4 + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + err = get_user(save, (unsigned int *)addr); + err |= get_user(call, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + if (err) + break; + +#ifdef CONFIG_PAX_DLRESOLVE + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + struct vm_area_struct *vma; + unsigned long call_dl_resolve; + + down_read(¤t->mm->mmap_sem); + call_dl_resolve = current->mm->call_dl_resolve; + up_read(¤t->mm->mmap_sem); + if (likely(call_dl_resolve)) + goto emulate; + + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); + + down_write(¤t->mm->mmap_sem); + if (current->mm->call_dl_resolve) { + call_dl_resolve = current->mm->call_dl_resolve; + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + goto emulate; + } + + call_dl_resolve = get_unmapped_area(NULL, 0UL, PAGE_SIZE, 0UL, MAP_PRIVATE); + if (!vma || (call_dl_resolve & ~PAGE_MASK)) { + up_write(¤t->mm->mmap_sem); + if (vma) + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + if (pax_insert_vma(vma, call_dl_resolve)) { + up_write(¤t->mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return 1; + } + + current->mm->call_dl_resolve = call_dl_resolve; + up_write(¤t->mm->mmap_sem); + +emulate: + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->tpc = call_dl_resolve; + regs->tnpc = addr+4; + return 3; + } +#endif + + /* PaX: glibc 2.4+ generates sethi/jmpl instead of save/call */ + if ((save & 0xFFC00000U) == 0x05000000U && + (call & 0xFFFFE000U) == 0x85C0A000U && + nop == 0x01000000U) + { + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G2] = addr + 4; + addr = (save & 0x003FFFFFU) << 10; + addr += (((call | 0xFFFFFFFFFFFFE000UL) ^ 0x00001000UL) + 0x00001000UL); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 3; + } + + /* PaX: 64-bit PLT stub */ + err = get_user(sethi1, (unsigned int *)addr); + err |= get_user(sethi2, (unsigned int *)(addr+4)); + err |= get_user(or1, (unsigned int *)(addr+8)); + err |= get_user(or2, (unsigned int *)(addr+12)); + err |= get_user(sllx, (unsigned int *)(addr+16)); + err |= get_user(add, (unsigned int *)(addr+20)); + err |= get_user(jmpl, (unsigned int *)(addr+24)); + err |= get_user(nop, (unsigned int *)(addr+28)); + if (err) + break; + + if ((sethi1 & 0xFFC00000U) == 0x09000000U && + (sethi2 & 0xFFC00000U) == 0x0B000000U && + (or1 & 0xFFFFE000U) == 0x88112000U && + (or2 & 0xFFFFE000U) == 0x8A116000U && + sllx == 0x89293020U && + add == 0x8A010005U && + jmpl == 0x89C14000U && + nop == 0x01000000U) + { + regs->u_regs[UREG_G1] = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G4] = ((sethi1 & 0x003FFFFFU) << 10) | (or1 & 0x000003FFU); + regs->u_regs[UREG_G4] <<= 32; + regs->u_regs[UREG_G5] = ((sethi2 & 0x003FFFFFU) << 10) | (or2 & 0x000003FFU); + regs->u_regs[UREG_G5] += regs->u_regs[UREG_G4]; + regs->u_regs[UREG_G4] = addr + 24; + addr = regs->u_regs[UREG_G5]; + regs->tpc = addr; + regs->tnpc = addr+4; + return 3; + } + } + } while (0); + +#ifdef CONFIG_PAX_DLRESOLVE + do { /* PaX: unpatched PLT emulation step 2 */ + unsigned int save, call, nop; + + err = get_user(save, (unsigned int *)(regs->tpc-4)); + err |= get_user(call, (unsigned int *)regs->tpc); + err |= get_user(nop, (unsigned int *)(regs->tpc+4)); + if (err) + break; + + if (save == 0x9DE3BFA8U && + (call & 0xC0000000U) == 0x40000000U && + nop == 0x01000000U) + { + unsigned long dl_resolve = regs->tpc + ((((call | 0xFFFFFFFFC0000000UL) ^ 0x20000000UL) + 0x20000000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + dl_resolve &= 0xFFFFFFFFUL; + + regs->u_regs[UREG_RETPC] = regs->tpc; + regs->tpc = dl_resolve; + regs->tnpc = dl_resolve+4; + return 3; + } + } while (0); +#endif + + do { /* PaX: patched PLT emulation #7, must be AFTER the unpatched PLT emulation */ + unsigned int sethi, ba, nop; + + err = get_user(sethi, (unsigned int *)regs->tpc); + err |= get_user(ba, (unsigned int *)(regs->tpc+4)); + err |= get_user(nop, (unsigned int *)(regs->tpc+8)); + + if (err) + break; + + if ((sethi & 0xFFC00000U) == 0x03000000U && + (ba & 0xFFF00000U) == 0x30600000U && + nop == 0x01000000U) + { + unsigned long addr; + + addr = (sethi & 0x003FFFFFU) << 10; + regs->u_regs[UREG_G1] = addr; + addr = regs->tpc + ((((ba | 0xFFFFFFFFFFF80000UL) ^ 0x00040000UL) + 0x00040000UL) << 2); + + if (test_thread_flag(TIF_32BIT)) + addr &= 0xFFFFFFFFUL; + + regs->tpc = addr; + regs->tnpc = addr+4; + return 2; + } + } while (0); + +#endif + + return 1; +} + +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 8; i++) { + unsigned int c; + if (get_user(c, (unsigned int *)pc+i)) + printk(KERN_CONT "???????? "); + else + printk(KERN_CONT "%08x ", c); + } + printk("\n"); +} +#endif + asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -348,6 +811,29 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) if (!vma) goto bad_area; +#ifdef CONFIG_PAX_PAGEEXEC + /* PaX: detect ITLB misses on non-exec pages */ + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && vma->vm_start <= address && + !(vma->vm_flags & VM_EXEC) && (fault_code & FAULT_CODE_ITLB)) + { + if (address != regs->tpc) + goto good_area; + + up_read(&mm->mmap_sem); + switch (pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void *)regs->tpc, (void *)(regs->u_regs[UREG_FP] + STACK_BIAS)); + do_group_exit(SIGKILL); + } +#endif + /* Pure DTLB misses do not tell us whether the fault causing * load/store/atomic was a write or not, it only says that there * was no match. So in such a case we (carefully) read the diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 42c55df..20da942 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -106,6 +106,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 07e1453..ae6e02e 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -28,7 +28,8 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags) + unsigned long flags, + unsigned long offset) { struct mm_struct *mm = current->mm; struct vm_area_struct * vma; @@ -67,7 +68,7 @@ full_search: } return -ENOMEM; } - if (likely(!vma || addr + len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, &addr, len, offset))) { /* * Remember the place where we stopped the search: */ @@ -85,7 +86,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long len, const unsigned long pgoff, - const unsigned long flags) + const unsigned long flags, + const unsigned long offset) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; @@ -105,26 +107,28 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (likely(addr > len)) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { + addr -= len; + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return (mm->free_area_cache = addr); } } if (unlikely(mm->mmap_base < len)) goto bottomup; - addr = (mm->mmap_base-len) & HPAGE_MASK; + addr = mm->mmap_base - len; do { + addr &= HPAGE_MASK; /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, * return with success: */ vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { + if (likely(check_heap_stack_gap(vma, &addr, len, offset))) { /* remember the address as a hint for next time */ return (mm->free_area_cache = addr); } @@ -134,8 +138,8 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = (vma->vm_start-len) & HPAGE_MASK; - } while (likely(len < vma->vm_start)); + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -163,6 +167,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long task_size = TASK_SIZE; + unsigned long offset = gr_rand_threadstack_offset(mm, file, flags); if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; @@ -181,16 +186,15 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, HPAGE_SIZE); vma = find_vma(mm, addr); - if (task_size - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (task_size - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); + pgoff, flags, offset); else return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); + pgoff, flags, offset); } pte_t *huge_pte_alloc(struct mm_struct *mm, diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 7b00de6..78239f4 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -316,6 +316,9 @@ extern void device_scan(void); pgprot_t PAGE_SHARED __read_mostly; EXPORT_SYMBOL(PAGE_SHARED); +pgprot_t PAGE_SHARED_NOEXEC __read_mostly; +EXPORT_SYMBOL(PAGE_SHARED_NOEXEC); + void __init paging_init(void) { switch(sparc_cpu_model) { @@ -344,17 +347,17 @@ void __init paging_init(void) /* Initialize the protection map with non-constant, MMU dependent values. */ protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; + protection_map[1] = PAGE_READONLY_NOEXEC; + protection_map[2] = PAGE_COPY_NOEXEC; + protection_map[3] = PAGE_COPY_NOEXEC; protection_map[4] = PAGE_READONLY; protection_map[5] = PAGE_READONLY; protection_map[6] = PAGE_COPY; protection_map[7] = PAGE_COPY; protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; + protection_map[9] = PAGE_READONLY_NOEXEC; + protection_map[10] = PAGE_SHARED_NOEXEC; + protection_map[11] = PAGE_SHARED_NOEXEC; protection_map[12] = PAGE_READONLY; protection_map[13] = PAGE_READONLY; protection_map[14] = PAGE_SHARED; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index d2c5737..05a3e21 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -170,9 +170,9 @@ unsigned long sparc64_kern_sec_context __read_mostly; int num_kernel_image_mappings; #ifdef CONFIG_DEBUG_DCFLUSH -atomic_t dcpage_flushes = ATOMIC_INIT(0); +atomic_unchecked_t dcpage_flushes = ATOMIC_INIT(0); #ifdef CONFIG_SMP -atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0); +atomic_unchecked_t dcpage_flushes_xcall = ATOMIC_INIT(0); #endif #endif @@ -180,7 +180,7 @@ inline void flush_dcache_page_impl(struct page *page) { BUG_ON(tlb_type == hypervisor); #ifdef CONFIG_DEBUG_DCFLUSH - atomic_inc(&dcpage_flushes); + atomic_inc_unchecked(&dcpage_flushes); #endif #ifdef DCACHE_ALIASING_POSSIBLE @@ -421,10 +421,10 @@ void mmu_info(struct seq_file *m) #ifdef CONFIG_DEBUG_DCFLUSH seq_printf(m, "DCPageFlushes\t: %d\n", - atomic_read(&dcpage_flushes)); + atomic_read_unchecked(&dcpage_flushes)); #ifdef CONFIG_SMP seq_printf(m, "DCPageFlushesXC\t: %d\n", - atomic_read(&dcpage_flushes_xcall)); + atomic_read_unchecked(&dcpage_flushes_xcall)); #endif /* CONFIG_SMP */ #endif /* CONFIG_DEBUG_DCFLUSH */ } diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index cbef74e..c38fead 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -2200,6 +2200,13 @@ void __init ld_mmu_srmmu(void) PAGE_SHARED = pgprot_val(SRMMU_PAGE_SHARED); BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); + +#ifdef CONFIG_PAX_PAGEEXEC + PAGE_SHARED_NOEXEC = pgprot_val(SRMMU_PAGE_SHARED_NOEXEC); + BTFIXUPSET_INT(page_copy_noexec, pgprot_val(SRMMU_PAGE_COPY_NOEXEC)); + BTFIXUPSET_INT(page_readonly_noexec, pgprot_val(SRMMU_PAGE_RDONLY_NOEXEC)); +#endif + BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S index 9c86b4b..8050f38 100644 --- a/arch/sparc/prom/cif.S +++ b/arch/sparc/prom/cif.S @@ -11,11 +11,10 @@ .text .globl prom_cif_direct prom_cif_direct: + save %sp, -192, %sp sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - ldx [%o1 + 0x0010], %o2 ! prom_cif_stack - save %o2, -192, %sp - ldx [%i1 + 0x0008], %l2 ! prom_cif_handler + ldx [%o1 + 0x0008], %l2 ! prom_cif_handler mov %g4, %l0 mov %g5, %l1 mov %g6, %l3 diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c index 5016c5e..ffb1cc5 100644 --- a/arch/sparc/prom/init_64.c +++ b/arch/sparc/prom/init_64.c @@ -26,13 +26,13 @@ phandle prom_chosen_node; * failure. It gets passed the pointer to the PROM vector. */ -extern void prom_cif_init(void *, void *); +extern void prom_cif_init(void *); -void __init prom_init(void *cif_handler, void *cif_stack) +void __init prom_init(void *cif_handler) { phandle node; - prom_cif_init(cif_handler, cif_stack); + prom_cif_init(cif_handler); prom_chosen_node = prom_finddevice(prom_chosen_path); if (!prom_chosen_node || (s32)prom_chosen_node == -1) diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c index d9850c2..5bbbc23 100644 --- a/arch/sparc/prom/p1275.c +++ b/arch/sparc/prom/p1275.c @@ -21,7 +21,6 @@ struct { long prom_callback; /* 0x00 */ void (*prom_cif_handler)(long *); /* 0x08 */ - unsigned long prom_cif_stack; /* 0x10 */ } p1275buf; extern void prom_world(int); @@ -53,5 +52,4 @@ void p1275_cmd_direct(unsigned long *args) void prom_cif_init(void *cif_handler, void *cif_stack) { p1275buf.prom_cif_handler = (void (*)(long *))cif_handler; - p1275buf.prom_cif_stack = (unsigned long)cif_stack; } diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 6cb8319..ee12bac 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -142,6 +142,7 @@ source "kernel/Kconfig.hz" config KEXEC bool "kexec system call" + depends on !GRKERNSEC_KMEM ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index 27fe667..36d474c 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -142,6 +142,16 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define atomic64_read_unchecked(v) atomic64_read(v) +#define atomic64_set_unchecked(v, i) atomic64_set((v), (i)) +#define atomic64_add_unchecked(a, v) atomic64_add((a), (v)) +#define atomic64_add_return_unchecked(a, v) atomic64_add_return((a), (v)) +#define atomic64_sub_unchecked(a, v) atomic64_sub((a), (v)) +#define atomic64_inc_unchecked(v) atomic64_inc(v) +#define atomic64_inc_return_unchecked(v) atomic64_inc_return(v) +#define atomic64_dec_unchecked(v) atomic64_dec(v) +#define atomic64_cmpxchg_unchecked(v, o, n) atomic64_cmpxchg((v), (o), (n)) + /* Atomic dec and inc don't implement barrier, so provide them if needed. */ #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 392e533..536b092 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h @@ -15,11 +15,12 @@ #ifndef _ASM_TILE_CACHE_H #define _ASM_TILE_CACHE_H +#include #include /* bytes per L1 data cache line */ #define L1_CACHE_SHIFT CHIP_L1D_LOG_LINE_SIZE() -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* bytes per L2 cache line */ #define L2_CACHE_SHIFT CHIP_L2_LOG_LINE_SIZE() diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index ef34d2caa..d6ce60c 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -361,9 +361,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - int sz = __compiletime_object_size(to); + size_t sz = __compiletime_object_size(to); - if (likely(sz == -1 || sz >= n)) + if (likely(sz == (size_t)-1 || sz >= n)) n = _copy_from_user(to, from, n); else copy_from_user_overflow(); diff --git a/arch/um/Kconfig.rest b/arch/um/Kconfig.rest index 567eb5f..93eb92f 100644 --- a/arch/um/Kconfig.rest +++ b/arch/um/Kconfig.rest @@ -12,6 +12,8 @@ source "arch/um/Kconfig.net" source "fs/Kconfig" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/um/Makefile b/arch/um/Makefile index 7730af6..880804f 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -61,6 +61,10 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\ $(patsubst -I%,,$(KBUILD_CFLAGS)))) $(ARCH_INCLUDE) $(MODE_INCLUDE) \ $(filter -I%,$(CFLAGS)) -D_FILE_OFFSET_BITS=64 -idirafter include +ifdef CONSTIFY_PLUGIN +USER_CFLAGS += -fplugin-arg-constify_plugin-no-constify +endif + #This will adjust *FLAGS accordingly to the platform. include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS) diff --git a/arch/um/include/asm/cache.h b/arch/um/include/asm/cache.h index 19e1bdd..3665b77 100644 --- a/arch/um/include/asm/cache.h +++ b/arch/um/include/asm/cache.h @@ -1,6 +1,7 @@ #ifndef __UM_CACHE_H #define __UM_CACHE_H +#include #if defined(CONFIG_UML_X86) && !defined(CONFIG_64BIT) # define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) @@ -12,6 +13,6 @@ # define L1_CACHE_SHIFT 5 #endif -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #endif diff --git a/arch/um/include/asm/kmap_types.h b/arch/um/include/asm/kmap_types.h index 6c03acd..a5e0215 100644 --- a/arch/um/include/asm/kmap_types.h +++ b/arch/um/include/asm/kmap_types.h @@ -23,6 +23,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_CLEARPAGE, KM_TYPE_NR }; diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h index 7cfc3ce..cbd1a58 100644 --- a/arch/um/include/asm/page.h +++ b/arch/um/include/asm/page.h @@ -14,6 +14,9 @@ #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + #ifndef __ASSEMBLY__ struct page; diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 0032f92..cd151e0 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -58,6 +58,7 @@ #define pud_present(x) (pud_val(x) & _PAGE_PRESENT) #define pud_populate(mm, pud, pmd) \ set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd))) +#define pud_populate_kernel(mm, pud, pmd) pud_populate((mm), (pud), (pmd)) #ifdef CONFIG_64BIT #define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval)) diff --git a/arch/um/include/shared/kern_constants.h b/arch/um/include/shared/kern_constants.h new file mode 100644 index 0000000..599e326 --- /dev/null +++ b/arch/um/include/shared/kern_constants.h @@ -0,0 +1 @@ +#include "../../../../include/generated/asm-offsets.h" diff --git a/arch/um/include/shared/user_constants.h b/arch/um/include/shared/user_constants.h new file mode 100644 index 0000000..5cbdb19 --- /dev/null +++ b/arch/um/include/shared/user_constants.h @@ -0,0 +1,40 @@ +/* + * DO NOT MODIFY. + * + * This file was generated by arch/um/Makefile + * + */ + +#define HOST_SC_CR2 176 /* offsetof(struct sigcontext, cr2) # */ +#define HOST_SC_ERR 152 /* offsetof(struct sigcontext, err) # */ +#define HOST_SC_TRAPNO 160 /* offsetof(struct sigcontext, trapno) # */ +#define HOST_FP_SIZE 64 /* sizeof(struct _fpstate) / sizeof(unsigned long) # */ +#define HOST_RBX 5 /* RBX # */ +#define HOST_RCX 11 /* RCX # */ +#define HOST_RDI 14 /* RDI # */ +#define HOST_RSI 13 /* RSI # */ +#define HOST_RDX 12 /* RDX # */ +#define HOST_RBP 4 /* RBP # */ +#define HOST_RAX 10 /* RAX # */ +#define HOST_R8 9 /* R8 # */ +#define HOST_R9 8 /* R9 # */ +#define HOST_R10 7 /* R10 # */ +#define HOST_R11 6 /* R11 # */ +#define HOST_R12 3 /* R12 # */ +#define HOST_R13 2 /* R13 # */ +#define HOST_R14 1 /* R14 # */ +#define HOST_R15 0 /* R15 # */ +#define HOST_ORIG_RAX 15 /* ORIG_RAX # */ +#define HOST_CS 17 /* CS # */ +#define HOST_SS 20 /* SS # */ +#define HOST_EFLAGS 18 /* EFLAGS # */ +#define HOST_IP 16 /* RIP # */ +#define HOST_SP 19 /* RSP # */ +#define UM_FRAME_SIZE 216 /* sizeof(struct user_regs_struct) # */ +#define UM_POLLIN 1 /* POLLIN # */ +#define UM_POLLPRI 2 /* POLLPRI # */ +#define UM_POLLOUT 4 /* POLLOUT # */ +#define UM_PROT_READ 1 /* PROT_READ # */ +#define UM_PROT_WRITE 2 /* PROT_WRITE # */ +#define UM_PROT_EXEC 4 /* PROT_EXEC # */ + diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index c533835..84db18e 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -406,22 +406,6 @@ int singlestepping(void * t) return 2; } -/* - * Only x86 and x86_64 have an arch_align_stack(). - * All other arches have "#define arch_align_stack(x) (x)" - * in their asm/system.h - * As this is included in UML from asm-um/system-generic.h, - * we can use it to behave as the subarch does. - */ -#ifndef arch_align_stack -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} -#endif - unsigned long get_wchan(struct task_struct *p) { unsigned long stack_page, sp, ip; diff --git a/arch/unicore32/include/asm/cache.h b/arch/unicore32/include/asm/cache.h index ad8f795..2c7eec6 100644 --- a/arch/unicore32/include/asm/cache.h +++ b/arch/unicore32/include/asm/cache.h @@ -12,8 +12,10 @@ #ifndef __UNICORE_CACHE_H__ #define __UNICORE_CACHE_H__ -#define L1_CACHE_SHIFT (5) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#include + +#define L1_CACHE_SHIFT 5 +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) /* * Memory returned by kmalloc() may be used for DMA, so we must make diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 28a1bca..a0b8829 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -75,6 +75,7 @@ config X86 select HAVE_BPF_JIT if (X86_64 && NET) select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG + select HAVE_ARCH_SECCOMP_FILTER select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER @@ -236,7 +237,7 @@ config X86_HT config X86_32_LAZY_GS def_bool y - depends on X86_32 && !CC_STACKPROTECTOR + depends on X86_32 && !CC_STACKPROTECTOR && !PAX_MEMORY_UDEREF config ARCH_HWEIGHT_CFLAGS string @@ -526,6 +527,7 @@ config SCHED_OMIT_FRAME_POINTER menuconfig PARAVIRT_GUEST bool "Paravirtualized guest support" + depends on !GRKERNSEC_CONFIG_AUTO || GRKERNSEC_CONFIG_VIRT_GUEST || (GRKERNSEC_CONFIG_VIRT_HOST && GRKERNSEC_CONFIG_VIRT_XEN) ---help--- Say Y here to get to see options related to running Linux under various hypervisors. This option alone does not add any kernel code. @@ -903,6 +905,7 @@ config VM86 config X86_16BIT bool "Enable support for 16-bit segments" if EXPERT + depends on !GRKERNSEC default y ---help--- This option is required by programs like Wine to run 16-bit @@ -1040,7 +1043,7 @@ choice config NOHIGHMEM bool "off" - depends on !X86_NUMAQ + depends on !X86_NUMAQ && !(PAX_PAGEEXEC && PAX_ENABLE_PAE) ---help--- Linux can use up to 64 Gigabytes of physical memory on x86 systems. However, the address space of 32-bit x86 processors is only 4 @@ -1077,7 +1080,7 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" - depends on !X86_NUMAQ + depends on !X86_NUMAQ && !(PAX_PAGEEXEC && PAX_ENABLE_PAE) ---help--- Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. @@ -1131,7 +1134,7 @@ config PAGE_OFFSET hex default 0xB0000000 if VMSPLIT_3G_OPT default 0x80000000 if VMSPLIT_2G - default 0x78000000 if VMSPLIT_2G_OPT + default 0x70000000 if VMSPLIT_2G_OPT default 0x40000000 if VMSPLIT_1G default 0xC0000000 depends on X86_32 @@ -1514,6 +1517,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + depends on X86_64 || !PAX_MEMORY_UDEREF ---help--- This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on @@ -1532,6 +1536,7 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" + depends on !GRKERNSEC_KMEM ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -1634,6 +1639,8 @@ config X86_NEED_RELOCS config PHYSICAL_ALIGN hex "Alignment value to which kernel should be aligned" if X86_32 default "0x1000000" + range 0x200000 0x1000000 if PAX_KERNEXEC && X86_PAE + range 0x400000 0x1000000 if PAX_KERNEXEC && !X86_PAE range 0x2000 0x1000000 ---help--- This value puts the alignment restrictions on physical address @@ -1665,9 +1672,10 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. config COMPAT_VDSO - def_bool y + def_bool n prompt "Compat VDSO support" depends on X86_32 || IA32_EMULATION + depends on !PAX_PAGEEXEC && !PAX_SEGMEXEC && !PAX_KERNEXEC && !PAX_MEMORY_UDEREF ---help--- Map the 32-bit VDSO to the predictable old-style address too. @@ -2189,6 +2197,8 @@ source "fs/Kconfig" source "arch/x86/Kconfig.debug" +source "kernel/vserver/Kconfig" + source "security/Kconfig" source "crypto/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e3ca7e0..b30b28a 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -341,7 +341,7 @@ config X86_PPRO_FENCE config X86_F00F_BUG def_bool y - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC config X86_INVD_BUG def_bool y @@ -365,7 +365,7 @@ config X86_POPAD_OK config X86_ALIGNMENT_16 def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MCORE2 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 config X86_INTEL_USERCOPY def_bool y @@ -411,7 +411,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPSC || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bf56e17..91465a1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -81,7 +81,7 @@ config X86_PTDUMP config DEBUG_RODATA bool "Write protect kernel read-only data structures" default y - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && BROKEN ---help--- Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const @@ -99,7 +99,7 @@ config DEBUG_RODATA_TEST config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES + depends on MODULES && BROKEN ---help--- This option helps catch unintended modifications to loadable kernel module's text and read-only data. It also prevents execution @@ -272,7 +272,7 @@ config OPTIMIZE_INLINING config DEBUG_STRICT_USER_COPY_CHECKS bool "Strict copy size checks" - depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING && !PAX_SIZE_OVERFLOW ---help--- Enabling this option turns a certain set of sanity checks for user copy operations into compile time failures. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 03dbc7f5b..e1aa479 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -40,12 +40,12 @@ ifeq ($(CONFIG_X86_32),y) KBUILD_CFLAGS += $(cflags-y) # temporary until string.h is fixed - KBUILD_CFLAGS += -ffreestanding else BITS := 64 UTS_MACHINE := x86_64 CHECKFLAGS += -D__x86_64__ -m64 + biarch := $(call cc-option,-m64) KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 @@ -72,6 +72,8 @@ else KBUILD_CFLAGS += -maccumulate-outgoing-args endif +KBUILD_CFLAGS += -ffreestanding + ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) @@ -199,3 +201,12 @@ define archhelp echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' endef + +define OLD_LD + +*** ${VERSION}.${PATCHLEVEL} PaX kernels no longer build correctly with old versions of binutils. +*** Please upgrade your binutils to 2.18 or newer +endef + +archprepare: + $(if $(LDFLAGS_BUILD_ID),,$(error $(OLD_LD))) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index e80542b..c5099c3 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -63,6 +63,9 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \ $(call cc-option, -fno-unit-at-a-time)) \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) +ifdef CONSTIFY_PLUGIN +KBUILD_CFLAGS += -fplugin-arg-constify_plugin-no-constify +endif KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 878e4b9..20537ab 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -26,7 +26,7 @@ static inline int variable_test_bit(int nr, const void *addr) u8 v; const u32 *p = (const u32 *)addr; - asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); + asm volatile("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); return v; } @@ -37,7 +37,7 @@ static inline int variable_test_bit(int nr, const void *addr) static inline void set_bit(int nr, void *addr) { - asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); + asm volatile("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); } #endif /* BOOT_BITOPS_H */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index c7093bd..d4247ffe0 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -85,7 +85,7 @@ static inline void io_delay(void) static inline u16 ds(void) { u16 seg; - asm("movw %%ds,%0" : "=rm" (seg)); + asm volatile("movw %%ds,%0" : "=rm" (seg)); return seg; } @@ -181,7 +181,7 @@ static inline void wrgs32(u32 v, addr_t addr) static inline int memcmp(const void *s1, const void *s2, size_t len) { u8 diff; - asm("repe; cmpsb; setnz %0" + asm volatile("repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index cda5cef..c1f26aa 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -15,6 +15,9 @@ KBUILD_CFLAGS += $(cflags-y) KBUILD_CFLAGS += -mno-mmx -mno-sse KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) +ifdef CONSTIFY_PLUGIN +KBUILD_CFLAGS += -fplugin-arg-constify_plugin-no-constify +endif KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 67a655a..b924059 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -76,7 +76,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $____LOAD_PHYSICAL_ADDR, %ebx #endif /* Target address to relocate to for decompression */ @@ -162,7 +162,7 @@ relocated: * and where it was actually loaded. */ movl %ebp, %ebx - subl $LOAD_PHYSICAL_ADDR, %ebx + subl $____LOAD_PHYSICAL_ADDR, %ebx jz 2f /* Nothing to be done if loaded at compiled addr. */ /* * Process relocations. @@ -170,8 +170,7 @@ relocated: 1: subl $4, %edi movl (%edi), %ecx - testl %ecx, %ecx - jz 2f + jecxz 2f addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) jmp 1b 2: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 35af09d..99c9676 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -91,7 +91,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx #else - movl $LOAD_PHYSICAL_ADDR, %ebx + movl $____LOAD_PHYSICAL_ADDR, %ebx #endif /* Target address to relocate to for decompression */ @@ -233,7 +233,7 @@ ENTRY(startup_64) notq %rax andq %rax, %rbp #else - movq $LOAD_PHYSICAL_ADDR, %rbp + movq $____LOAD_PHYSICAL_ADDR, %rbp #endif /* Target address to relocate to for decompression */ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 3a19d04..1bef1d5 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -226,7 +226,7 @@ void __putstr(int error, const char *s) void *memset(void *s, int c, size_t n) { - int i; + size_t i; char *ss = s; for (i = 0; i < n; i++) @@ -282,7 +282,7 @@ static void parse_elf(void *output) Elf32_Ehdr ehdr; Elf32_Phdr *phdrs, *phdr; #endif - void *dest; + void *dest, *prev; int i; memcpy(&ehdr, output, sizeof(ehdr)); @@ -310,13 +310,16 @@ static void parse_elf(void *output) case PT_LOAD: #ifdef CONFIG_RELOCATABLE dest = output; - dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); + dest += (phdr->p_paddr - ____LOAD_PHYSICAL_ADDR); #else dest = (void *)(phdr->p_paddr); #endif memcpy(dest, output + phdr->p_offset, phdr->p_filesz); + if (i) + memset(prev, 0xff, dest - prev); + prev = dest + phdr->p_filesz; break; default: /* Ignore other PT_* */ break; } @@ -363,7 +366,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, error("Destination address too large"); #endif #ifndef CONFIG_RELOCATABLE - if ((unsigned long)output != LOAD_PHYSICAL_ADDR) + if ((unsigned long)output != ____LOAD_PHYSICAL_ADDR) error("Wrong destination address"); #endif diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff03..e4972ff 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -74,7 +74,7 @@ static int has_fpu(void) u16 fcw = -1, fsw = -1; u32 cr0; - asm("movl %%cr0,%0" : "=r" (cr0)); + asm volatile("movl %%cr0,%0" : "=r" (cr0)); if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { cr0 &= ~(X86_CR0_EM|X86_CR0_TS); asm volatile("movl %0,%%cr0" : : "r" (cr0)); @@ -90,7 +90,7 @@ static int has_eflag(u32 mask) { u32 f0, f1; - asm("pushfl ; " + asm volatile("pushfl ; " "pushfl ; " "popl %0 ; " "movl %0,%1 ; " @@ -115,7 +115,7 @@ static void get_flags(void) set_bit(X86_FEATURE_FPU, cpu.flags); if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" + asm volatile("cpuid" : "=a" (max_intel_level), "=b" (cpu_vendor[0]), "=d" (cpu_vendor[1]), @@ -124,7 +124,7 @@ static void get_flags(void) if (max_intel_level >= 0x00000001 && max_intel_level <= 0x0000ffff) { - asm("cpuid" + asm volatile("cpuid" : "=a" (tfms), "=c" (cpu.flags[4]), "=d" (cpu.flags[0]) @@ -136,7 +136,7 @@ static void get_flags(void) cpu.model += ((tfms >> 16) & 0xf) << 4; } - asm("cpuid" + asm volatile("cpuid" : "=a" (max_amd_level) : "a" (0x80000000) : "ebx", "ecx", "edx"); @@ -144,7 +144,7 @@ static void get_flags(void) if (max_amd_level >= 0x80000001 && max_amd_level <= 0x8000ffff) { u32 eax = 0x80000001; - asm("cpuid" + asm volatile("cpuid" : "+a" (eax), "=c" (cpu.flags[6]), "=d" (cpu.flags[1]) @@ -203,9 +203,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) u32 ecx = MSR_K7_HWCR; u32 eax, edx; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); eax &= ~(1 << 15); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); get_flags(); /* Make sure it really did something */ err = check_flags(); @@ -218,9 +218,9 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) u32 ecx = MSR_VIA_FCR; u32 eax, edx; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); eax |= (1<<1)|(1<<7); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); set_bit(X86_FEATURE_CX8, cpu.flags); err = check_flags(); @@ -231,12 +231,12 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) u32 eax, edx; u32 level = 1; - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); - asm("cpuid" + asm volatile("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm volatile("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); + asm volatile("cpuid" : "+a" (level), "=d" (cpu.flags[0]) : : "ecx", "ebx"); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + asm volatile("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); err = check_flags(); } diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index bdb4d45..77703de 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -224,10 +224,14 @@ setup_data: .quad 0 # 64-bit physical pointer to # single linked list of # struct setup_data -pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr +pref_address: .quad ____LOAD_PHYSICAL_ADDR # preferred load addr #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) +#define VO_INIT_SIZE (VO__end - VO__text - __PAGE_OFFSET - ____LOAD_PHYSICAL_ADDR) +#else #define VO_INIT_SIZE (VO__end - VO__text) +#endif #if ZO_INIT_SIZE > VO_INIT_SIZE #define INIT_SIZE ZO_INIT_SIZE #else diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index db75d07..8e6d0af 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -19,7 +19,7 @@ static int detect_memory_e820(void) { - int count = 0; + unsigned int count = 0; struct biosregs ireg, oreg; struct e820entry *desc = boot_params.e820_map; static struct e820entry buf; /* static so it is zeroed */ diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 11e8c6e..fdbb1ed 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -200,6 +200,7 @@ static void vesa_store_pm_info(void) boot_params.screen_info.vesapm_seg = oreg.es; boot_params.screen_info.vesapm_off = oreg.di; + boot_params.screen_info.vesapm_size = oreg.cx; } /* diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 43eda28..5ab5fdb 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -96,7 +96,7 @@ static void store_mode_params(void) static unsigned int get_entry(void) { char entry_buf[4]; - int i, len = 0; + unsigned int i, len = 0; int key; unsigned int v; diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 5b577d5..eb7f25e 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -8,6 +8,8 @@ * including this sentence is retained in full. */ +#include + .extern crypto_ft_tab .extern crypto_it_tab .extern crypto_fl_tab @@ -71,6 +73,8 @@ FUNC: movq r1,r2; \ je B192; \ leaq 32(r9),r9; +#define ret pax_force_retaddr; ret + #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ movq r1,r2; \ movq r3,r4; \ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3470624..9b476a3 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -31,6 +31,7 @@ #include #include +#include #ifdef __x86_64__ .data @@ -199,7 +200,7 @@ enc: .octa 0x2 * num_initial_blocks = b mod 4 * encrypt the initial num_initial_blocks blocks and apply ghash on * the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* %r10, %r11, %r15, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers * are clobbered * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified */ @@ -208,8 +209,8 @@ enc: .octa 0x2 .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 + mov arg8, %r15 # %r15 = aadLen + mov %r15, %r11 pxor %xmm\i, %xmm\i _get_AAD_loop\num_initial_blocks\operation: movd (%r10), \TMP1 @@ -217,15 +218,15 @@ _get_AAD_loop\num_initial_blocks\operation: psrldq $4, %xmm\i pxor \TMP1, %xmm\i add $4, %r10 - sub $4, %r12 + sub $4, %r15 jne _get_AAD_loop\num_initial_blocks\operation cmp $16, %r11 je _get_AAD_loop2_done\num_initial_blocks\operation - mov $16, %r12 + mov $16, %r15 _get_AAD_loop2\num_initial_blocks\operation: psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 + sub $4, %r15 + cmp %r11, %r15 jne _get_AAD_loop2\num_initial_blocks\operation _get_AAD_loop2_done\num_initial_blocks\operation: movdqa SHUF_MASK(%rip), %xmm14 @@ -437,7 +438,7 @@ _initial_blocks_done\num_initial_blocks\operation: * num_initial_blocks = b mod 4 * encrypt the initial num_initial_blocks blocks and apply ghash on * the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* %r10, %r11, %r15, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers * are clobbered * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified */ @@ -446,8 +447,8 @@ _initial_blocks_done\num_initial_blocks\operation: .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg7, %r10 # %r10 = AAD - mov arg8, %r12 # %r12 = aadLen - mov %r12, %r11 + mov arg8, %r15 # %r15 = aadLen + mov %r15, %r11 pxor %xmm\i, %xmm\i _get_AAD_loop\num_initial_blocks\operation: movd (%r10), \TMP1 @@ -455,15 +456,15 @@ _get_AAD_loop\num_initial_blocks\operation: psrldq $4, %xmm\i pxor \TMP1, %xmm\i add $4, %r10 - sub $4, %r12 + sub $4, %r15 jne _get_AAD_loop\num_initial_blocks\operation cmp $16, %r11 je _get_AAD_loop2_done\num_initial_blocks\operation - mov $16, %r12 + mov $16, %r15 _get_AAD_loop2\num_initial_blocks\operation: psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 + sub $4, %r15 + cmp %r11, %r15 jne _get_AAD_loop2\num_initial_blocks\operation _get_AAD_loop2_done\num_initial_blocks\operation: movdqa SHUF_MASK(%rip), %xmm14 @@ -1264,7 +1265,7 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst *****************************************************************************/ ENTRY(aesni_gcm_dec) - push %r12 + push %r15 push %r13 push %r14 mov %rsp, %r14 @@ -1274,8 +1275,8 @@ ENTRY(aesni_gcm_dec) */ sub $VARIABLE_OFFSET, %rsp and $~63, %rsp # align rsp to 64 bytes - mov %arg6, %r12 - movdqu (%r12), %xmm13 # %xmm13 = HashKey + mov %arg6, %r15 + movdqu (%r15), %xmm13 # %xmm13 = HashKey movdqa SHUF_MASK(%rip), %xmm2 PSHUFB_XMM %xmm2, %xmm13 @@ -1303,10 +1304,10 @@ ENTRY(aesni_gcm_dec) movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) - mov %r13, %r12 - and $(3<<4), %r12 + mov %r13, %r15 + and $(3<<4), %r15 jz _initial_num_blocks_is_0_decrypt - cmp $(2<<4), %r12 + cmp $(2<<4), %r15 jb _initial_num_blocks_is_1_decrypt je _initial_num_blocks_is_2_decrypt _initial_num_blocks_is_3_decrypt: @@ -1356,16 +1357,16 @@ _zero_cipher_left_decrypt: sub $16, %r11 add %r13, %r11 movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 + lea SHIFT_MASK+16(%rip), %r15 + sub %r13, %r15 # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes # (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask + movdqu (%r15), %xmm2 # get the appropriate shuffle mask PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes movdqa %xmm1, %xmm2 pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + movdqu ALL_F-SHIFT_MASK(%r15), %xmm1 # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm2 @@ -1394,9 +1395,9 @@ _less_than_8_bytes_left_decrypt: sub $1, %r13 jne _less_than_8_bytes_left_decrypt _multiple_of_16_bytes_decrypt: - mov arg8, %r12 # %r13 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - movd %r12d, %xmm15 # len(A) in %xmm15 + mov arg8, %r15 # %r13 = aadLen (number of bytes) + shl $3, %r15 # convert into number of bits + movd %r15d, %xmm15 # len(A) in %xmm15 shl $3, %arg4 # len(C) in bits (*128) MOVQ_R64_XMM %arg4, %xmm1 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 @@ -1435,8 +1436,10 @@ _return_T_done_decrypt: mov %r14, %rsp pop %r14 pop %r13 - pop %r12 + pop %r15 + pax_force_retaddr ret +ENDPROC(aesni_gcm_dec) /***************************************************************************** @@ -1523,7 +1526,7 @@ _return_T_done_decrypt: * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ ENTRY(aesni_gcm_enc) - push %r12 + push %r15 push %r13 push %r14 mov %rsp, %r14 @@ -1533,8 +1536,8 @@ ENTRY(aesni_gcm_enc) # sub $VARIABLE_OFFSET, %rsp and $~63, %rsp - mov %arg6, %r12 - movdqu (%r12), %xmm13 + mov %arg6, %r15 + movdqu (%r15), %xmm13 movdqa SHUF_MASK(%rip), %xmm2 PSHUFB_XMM %xmm2, %xmm13 @@ -1558,13 +1561,13 @@ ENTRY(aesni_gcm_enc) movdqa %xmm13, HashKey(%rsp) mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) and $-16, %r13 - mov %r13, %r12 + mov %r13, %r15 # Encrypt first few blocks - and $(3<<4), %r12 + and $(3<<4), %r15 jz _initial_num_blocks_is_0_encrypt - cmp $(2<<4), %r12 + cmp $(2<<4), %r15 jb _initial_num_blocks_is_1_encrypt je _initial_num_blocks_is_2_encrypt _initial_num_blocks_is_3_encrypt: @@ -1617,14 +1620,14 @@ _zero_cipher_left_encrypt: sub $16, %r11 add %r13, %r11 movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks - lea SHIFT_MASK+16(%rip), %r12 - sub %r13, %r12 + lea SHIFT_MASK+16(%rip), %r15 + sub %r13, %r15 # adjust the shuffle mask pointer to be able to shift 16-r13 bytes # (%r13 is the number of bytes in plaintext mod 16) - movdqu (%r12), %xmm2 # get the appropriate shuffle mask + movdqu (%r15), %xmm2 # get the appropriate shuffle mask PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + movdqu ALL_F-SHIFT_MASK(%r15), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm0 pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 movdqa SHUF_MASK(%rip), %xmm10 @@ -1657,9 +1660,9 @@ _less_than_8_bytes_left_encrypt: sub $1, %r13 jne _less_than_8_bytes_left_encrypt _multiple_of_16_bytes_encrypt: - mov arg8, %r12 # %r12 = addLen (number of bytes) - shl $3, %r12 - movd %r12d, %xmm15 # len(A) in %xmm15 + mov arg8, %r15 # %r15 = addLen (number of bytes) + shl $3, %r15 + movd %r15d, %xmm15 # len(A) in %xmm15 shl $3, %arg4 # len(C) in bits (*128) MOVQ_R64_XMM %arg4, %xmm1 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 @@ -1698,8 +1701,10 @@ _return_T_done_encrypt: mov %r14, %rsp pop %r14 pop %r13 - pop %r12 + pop %r15 + pax_force_retaddr ret +ENDPROC(aesni_gcm_enc) #endif @@ -1714,6 +1719,7 @@ _key_expansion_256a: pxor %xmm1, %xmm0 movaps %xmm0, (TKEYP) add $0x10, TKEYP + pax_force_retaddr ret .align 4 @@ -1738,6 +1744,7 @@ _key_expansion_192a: shufps $0b01001110, %xmm2, %xmm1 movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP + pax_force_retaddr ret .align 4 @@ -1757,6 +1764,7 @@ _key_expansion_192b: movaps %xmm0, (TKEYP) add $0x10, TKEYP + pax_force_retaddr ret .align 4 @@ -1769,6 +1777,7 @@ _key_expansion_256b: pxor %xmm1, %xmm2 movaps %xmm2, (TKEYP) add $0x10, TKEYP + pax_force_retaddr ret /* @@ -1881,7 +1890,9 @@ ENTRY(aesni_set_key) #ifndef __x86_64__ popl KEYP #endif + pax_force_retaddr ret +ENDPROC(aesni_set_key) /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -1902,7 +1913,9 @@ ENTRY(aesni_enc) popl KLEN popl KEYP #endif + pax_force_retaddr ret +ENDPROC(aesni_enc) /* * _aesni_enc1: internal ABI @@ -1959,6 +1972,7 @@ _aesni_enc1: AESENC KEY STATE movaps 0x70(TKEYP), KEY AESENCLAST KEY STATE + pax_force_retaddr ret /* @@ -2067,6 +2081,7 @@ _aesni_enc4: AESENCLAST KEY STATE2 AESENCLAST KEY STATE3 AESENCLAST KEY STATE4 + pax_force_retaddr ret /* @@ -2089,7 +2104,9 @@ ENTRY(aesni_dec) popl KLEN popl KEYP #endif + pax_force_retaddr ret +ENDPROC(aesni_dec) /* * _aesni_dec1: internal ABI @@ -2146,6 +2163,7 @@ _aesni_dec1: AESDEC KEY STATE movaps 0x70(TKEYP), KEY AESDECLAST KEY STATE + pax_force_retaddr ret /* @@ -2254,6 +2272,7 @@ _aesni_dec4: AESDECLAST KEY STATE2 AESDECLAST KEY STATE3 AESDECLAST KEY STATE4 + pax_force_retaddr ret /* @@ -2311,7 +2330,9 @@ ENTRY(aesni_ecb_enc) popl KEYP popl LEN #endif + pax_force_retaddr ret +ENDPROC(aesni_ecb_enc) /* * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2369,7 +2390,9 @@ ENTRY(aesni_ecb_dec) popl KEYP popl LEN #endif + pax_force_retaddr ret +ENDPROC(aesni_ecb_dec) /* * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2410,7 +2433,9 @@ ENTRY(aesni_cbc_enc) popl LEN popl IVP #endif + pax_force_retaddr ret +ENDPROC(aesni_cbc_enc) /* * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2500,7 +2525,9 @@ ENTRY(aesni_cbc_dec) popl LEN popl IVP #endif + pax_force_retaddr ret +ENDPROC(aesni_cbc_dec) #ifdef __x86_64__ .align 16 @@ -2526,6 +2553,7 @@ _aesni_inc_init: mov $1, TCTR_LOW MOVQ_R64_XMM TCTR_LOW INC MOVQ_R64_XMM CTR TCTR_LOW + pax_force_retaddr ret /* @@ -2554,6 +2582,7 @@ _aesni_inc: .Linc_low: movaps CTR, IV PSHUFB_XMM BSWAP_MASK IV + pax_force_retaddr ret /* @@ -2614,5 +2643,7 @@ ENTRY(aesni_ctr_enc) .Lctr_enc_ret: movups IV, (IVP) .Lctr_enc_just_ret: + pax_force_retaddr ret +ENDPROC(aesni_ctr_enc) #endif diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 391d245..c73d634 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include + .file "blowfish-x86_64-asm.S" .text @@ -151,9 +153,11 @@ __blowfish_enc_blk: jnz __enc_xor; write_block(); + pax_force_retaddr ret; __enc_xor: xor_block(); + pax_force_retaddr ret; .align 8 @@ -188,6 +192,7 @@ blowfish_dec_blk: movq %r11, %rbp; + pax_force_retaddr ret; /********************************************************************** @@ -342,6 +347,7 @@ __blowfish_enc_blk_4way: popq %rbx; popq %rbp; + pax_force_retaddr ret; __enc_xor4: @@ -349,6 +355,7 @@ __enc_xor4: popq %rbx; popq %rbp; + pax_force_retaddr ret; .align 8 @@ -386,5 +393,6 @@ blowfish_dec_blk_4way: popq %rbx; popq %rbp; + pax_force_retaddr ret; diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 6214a9b..5c0f959 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -1,3 +1,5 @@ +#include + # enter ECRYPT_encrypt_bytes .text .p2align 5 @@ -790,6 +792,7 @@ ECRYPT_encrypt_bytes: add %r11,%rsp mov %rdi,%rax mov %rsi,%rdx + pax_force_retaddr ret # bytesatleast65: ._bytesatleast65: @@ -891,6 +894,7 @@ ECRYPT_keysetup: add %r11,%rsp mov %rdi,%rax mov %rsi,%rdx + pax_force_retaddr ret # enter ECRYPT_ivsetup .text @@ -917,4 +921,5 @@ ECRYPT_ivsetup: add %r11,%rsp mov %rdi,%rax mov %rsi,%rdx + pax_force_retaddr ret diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57..f30325b 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -28,6 +28,8 @@ * (at your option) any later version. */ +#include + #define CTX %rdi // arg1 #define BUF %rsi // arg2 #define CNT %rdx // arg3 @@ -75,9 +77,9 @@ \name: push %rbx push %rbp - push %r12 + push %r14 - mov %rsp, %r12 + mov %rsp, %r14 sub $64, %rsp # allocate workspace and $~15, %rsp # align stack @@ -99,11 +101,12 @@ xor %rax, %rax rep stosq - mov %r12, %rsp # deallocate workspace + mov %r14, %rsp # deallocate workspace - pop %r12 + pop %r14 pop %rbp pop %rbx + pax_force_retaddr ret .size \name, .-\name diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 5b012a2..9712c31 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -20,6 +20,8 @@ * */ +#include + .file "twofish-x86_64-asm-3way.S" .text @@ -260,6 +262,7 @@ __twofish_enc_blk_3way: popq %r13; popq %r14; popq %r15; + pax_force_retaddr ret; __enc_xor3: @@ -271,6 +274,7 @@ __enc_xor3: popq %r13; popq %r14; popq %r15; + pax_force_retaddr ret; .global twofish_dec_blk_3way @@ -312,5 +316,6 @@ twofish_dec_blk_3way: popq %r13; popq %r14; popq %r15; + pax_force_retaddr ret; diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 7bcf3fc..560ff4c 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -21,6 +21,7 @@ .text #include +#include #define a_offset 0 #define b_offset 4 @@ -268,6 +269,7 @@ twofish_enc_blk: popq R1 movq $1,%rax + pax_force_retaddr ret twofish_dec_blk: @@ -319,4 +321,5 @@ twofish_dec_blk: popq R1 movq $1,%rax + pax_force_retaddr ret diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index fd84387..887aa7ef 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -162,6 +162,8 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long dump_start, dump_size; struct user32 dump; + memset(&dump, 0, sizeof(dump)); + fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; @@ -315,6 +317,13 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->free_area_cache = TASK_UNMAPPED_BASE; current->mm->cached_hole_size = 0; + retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); + if (retval < 0) { + /* Someone check-me: is this error path enough? */ + send_sig(SIGKILL, current, 0); + return retval; + } + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; @@ -410,13 +419,6 @@ beyond_if: set_brk(current->mm->start_brk, current->mm->brk); - retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); - return retval; - } - current->mm->start_stack = (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); /* start thread */ diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 6557769..c135ca5 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -73,6 +73,10 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) switch (from->si_code >> 16) { case __SI_FAULT >> 16: break; + case __SI_SYS >> 16: + put_user_ex(from->si_syscall, &to->si_syscall); + put_user_ex(from->si_arch, &to->si_arch); + break; case __SI_CHLD >> 16: put_user_ex(from->si_utime, &to->si_utime); put_user_ex(from->si_stime, &to->si_stime); @@ -169,7 +173,7 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack(uss_ptr ? (const stack_t __force_user *)&uss : NULL, (stack_t __force_user *)&uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -276,7 +280,7 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs) if (__get_user(set.sig[0], &frame->sc.oldmask) || (_COMPAT_NSIG_WORDS > 1 && __copy_from_user((((char *) &set.sig) + 4), - &frame->extramask, + frame->extramask, sizeof(frame->extramask)))) goto badframe; @@ -370,7 +374,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -391,7 +395,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, if (used_math()) { sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; + *fpstate = (struct _fpstate_ia32 __user *) sp; if (save_i387_xstate_ia32(*fpstate) < 0) return (void __user *) -1L; } @@ -399,7 +403,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp -= frame_size; /* Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - sp = ((sp + 4) & -16ul) - 4; + sp = ((sp - 12) & -16ul) - 4; return (void __user *) sp; } @@ -447,7 +451,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); else - restorer = &frame->retcode; + restorer = frame->retcode; } put_user_try { @@ -457,7 +461,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((const u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -499,7 +503,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0xb8, __NR_ia32_rt_sigreturn, 0x80cd, - 0, + 0 }; frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); @@ -529,16 +533,18 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; + else if (current->mm->context.vdso) + /* Return stub is in 32bit vsyscall page */ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); else - restorer = VDSO32_SYMBOL(current->mm->context.vdso, - rt_sigreturn); + restorer = frame->retcode; put_user_ex(ptr_to_compat(restorer), &frame->pretcode); /* * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((const u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 95b4eb3..b5195f4 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -61,12 +63,12 @@ */ .macro LOAD_ARGS32 offset, _r9=0 .if \_r9 - movl \offset+16(%rsp),%r9d + movl \offset+R9(%rsp),%r9d .endif - movl \offset+40(%rsp),%ecx - movl \offset+48(%rsp),%edx - movl \offset+56(%rsp),%esi - movl \offset+64(%rsp),%edi + movl \offset+RCX(%rsp),%ecx + movl \offset+RDX(%rsp),%edx + movl \offset+RSI(%rsp),%esi + movl \offset+RDI(%rsp),%edi movl %eax,%eax /* zero extension */ .endm @@ -95,6 +97,32 @@ ENTRY(native_irq_enable_sysexit) ENDPROC(native_irq_enable_sysexit) #endif + .macro pax_enter_kernel_user + pax_set_fptr_mask +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_enter_kernel_user +#endif + .endm + + .macro pax_exit_kernel_user +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_exit_kernel_user +#endif +#ifdef CONFIG_PAX_RANDKSTACK + pushq %rax + pushq %r11 + call pax_randomize_kstack + popq %r11 + popq %rax +#endif + .endm + +.macro pax_erase_kstack +#ifdef CONFIG_PAX_MEMORY_STACKLEAK + call pax_erase_kstack +#endif +.endm + /* * 32bit SYSENTER instruction entry. * @@ -121,12 +149,6 @@ ENTRY(ia32_sysenter_target) CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK movq PER_CPU_VAR(kernel_stack), %rsp - addq $(KERNEL_STACK_OFFSET),%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs, here we enable it straight after entry: - */ - ENABLE_INTERRUPTS(CLBR_NONE) movl %ebp,%ebp /* zero extension */ pushq_cfi $__USER32_DS /*CFI_REL_OFFSET ss,0*/ @@ -134,25 +156,44 @@ ENTRY(ia32_sysenter_target) CFI_REL_OFFSET rsp,0 pushfq_cfi /*CFI_REL_OFFSET rflags,0*/ - movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d - CFI_REGISTER rip,r10 + orl $X86_EFLAGS_IF,(%rsp) + GET_THREAD_INFO(%r11) + movl TI_sysenter_return(%r11), %r11d + CFI_REGISTER rip,r11 pushq_cfi $__USER32_CS /*CFI_REL_OFFSET cs,0*/ movl %eax, %eax - pushq_cfi %r10 + pushq_cfi %r11 CFI_REL_OFFSET rip,0 pushq_cfi %rax cld SAVE_ARGS 0,1,0 + pax_enter_kernel_user + +#ifdef CONFIG_PAX_RANDKSTACK + pax_erase_kstack +#endif + + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs, here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) /* no need to do an access_ok check here because rbp has been 32bit zero extended */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov pax_user_shadow_base,%r11 + add %r11,%rbp +#endif + 1: movl (%rbp),%ebp .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + GET_THREAD_INFO(%r11) + orl $TS_COMPAT,TI_status(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11) CFI_REMEMBER_STATE jnz sysenter_tracesys cmpq $(IA32_NR_syscalls-1),%rax @@ -162,16 +203,18 @@ sysenter_do_call: sysenter_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) + GET_THREAD_INFO(%r11) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags(%r11) jnz sysexit_audit sysexit_from_sys_call: - andl $~TS_COMPAT,TI_status(%r10) + pax_exit_kernel_user + pax_erase_kstack + andl $~TS_COMPAT,TI_status(%r11) /* clear IF, that popfq doesn't enable interrupts early */ - andl $~0x200,EFLAGS-R11(%rsp) - movl RIP-R11(%rsp),%edx /* User %eip */ + andl $~X86_EFLAGS_IF,EFLAGS(%rsp) + movl RIP(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx RESTORE_ARGS 0,24,0,0,0,0 xorq %r8,%r8 @@ -194,6 +237,9 @@ sysexit_from_sys_call: movl %eax,%esi /* 2nd arg: syscall number */ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ call audit_syscall_entry + + pax_erase_kstack + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys @@ -205,7 +251,7 @@ sysexit_from_sys_call: .endm .macro auditsys_exit exit - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11) jnz ia32_ret_from_sys_call TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) @@ -215,12 +261,12 @@ sysexit_from_sys_call: movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ call audit_syscall_exit - GET_THREAD_INFO(%r10) + GET_THREAD_INFO(%r11) movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl %edi,TI_flags(%r10) + testl %edi,TI_flags(%r11) jz \exit CLEAR_RREGS -ARGOFFSET jmp int_with_check @@ -238,7 +284,7 @@ sysexit_audit: sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11) jz sysenter_auditsys #endif SAVE_REST @@ -250,6 +296,9 @@ sysenter_tracesys: RESTORE_REST cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ + + pax_erase_kstack + jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -277,19 +326,25 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_DEF_CFA rsp,0 CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 movq PER_CPU_VAR(kernel_stack),%rsp + SAVE_ARGS 8*6,0,0 + pax_enter_kernel_user + +#ifdef CONFIG_PAX_RANDKSTACK + pax_erase_kstack +#endif + /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0,0 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) @@ -305,13 +360,19 @@ ENTRY(ia32_cstar_target) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov pax_user_shadow_base,%r11 + add %r11,%r8 +#endif + 1: movl (%r8),%r9d .section __ex_table,"a" .quad 1b,ia32_badarg .previous - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + GET_THREAD_INFO(%r11) + orl $TS_COMPAT,TI_status(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11) CFI_REMEMBER_STATE jnz cstar_tracesys cmpq $IA32_NR_syscalls-1,%rax @@ -321,14 +382,16 @@ cstar_do_call: cstar_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) - GET_THREAD_INFO(%r10) + GET_THREAD_INFO(%r11) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags(%r10) + testl $_TIF_ALLWORK_MASK,TI_flags(%r11) jnz sysretl_audit sysretl_from_sys_call: - andl $~TS_COMPAT,TI_status(%r10) - RESTORE_ARGS 0,-ARG_SKIP,0,0,0 + pax_exit_kernel_user + pax_erase_kstack + andl $~TS_COMPAT,TI_status(%r11) + RESTORE_ARGS 0,-ORIG_RAX,0,0,0 movl RIP-ARGOFFSET(%rsp),%ecx CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d @@ -355,7 +418,7 @@ sysretl_audit: cstar_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r11) jz cstar_auditsys #endif xchgl %r9d,%ebp @@ -369,6 +432,9 @@ cstar_tracesys: xchgl %ebp,%r9d cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ + + pax_erase_kstack + jmp cstar_do_call END(ia32_cstar_target) @@ -409,20 +475,26 @@ ENTRY(ia32_syscall) CFI_REL_OFFSET rip,RIP-RIP PARAVIRT_ADJUST_EXCEPTION_FRAME SWAPGS - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%eax pushq_cfi %rax cld /* note the registers are not zero extended to the sf. this could be a problem. */ SAVE_ARGS 0,1,0 - GET_THREAD_INFO(%r10) - orl $TS_COMPAT,TI_status(%r10) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) + pax_enter_kernel_user + +#ifdef CONFIG_PAX_RANDKSTACK + pax_erase_kstack +#endif + + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ + ENABLE_INTERRUPTS(CLBR_NONE) + GET_THREAD_INFO(%r11) + orl $TS_COMPAT,TI_status(%r11) + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r11) jnz ia32_tracesys cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys @@ -445,6 +517,9 @@ ia32_tracesys: RESTORE_REST cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + + pax_erase_kstack + jmp ia32_do_call END(ia32_syscall) @@ -455,6 +530,7 @@ ia32_badsys: quiet_ni_syscall: movq $-ENOSYS,%rax + pax_force_retaddr ret CFI_ENDPROC @@ -776,7 +852,7 @@ ia32_sys_call_table: .quad sys_tgkill /* 270 */ .quad compat_sys_utimes .quad sys32_fadvise64_64 - .quad quiet_ni_syscall /* sys_vserver */ + .quad sys32_vserver .quad sys_mbind .quad compat_sys_get_mempolicy /* 275 */ .quad sys_set_mempolicy diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index f6f5c53..8e51d70 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -69,8 +69,8 @@ asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, */ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) { - typeof(ubuf->st_uid) uid = 0; - typeof(ubuf->st_gid) gid = 0; + typeof(((struct stat64 *)0)->st_uid) uid = 0; + typeof(((struct stat64 *)0)->st_gid) gid = 0; SET_UID(uid, stat->uid); SET_GID(gid, stat->gid); if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) || @@ -308,8 +308,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, } set_fs(KERNEL_DS); ret = sys_rt_sigprocmask(how, - set ? (sigset_t __user *)&s : NULL, - oset ? (sigset_t __user *)&s : NULL, + set ? (sigset_t __force_user *)&s : NULL, + oset ? (sigset_t __force_user *)&s : NULL, sigsetsize); set_fs(old_fs); if (ret) @@ -332,7 +332,7 @@ asmlinkage long sys32_alarm(unsigned int seconds) return alarm_setitimer(seconds); } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); @@ -353,7 +353,7 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); + ret = sys_sched_rr_get_interval(pid, (struct timespec __force_user *)&t); set_fs(old_fs); if (put_compat_timespec(&t, interval)) return -EFAULT; @@ -363,13 +363,13 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, compat_size_t sigsetsize) { - sigset_t s; + sigset_t s = { }; compat_sigset_t s32; int ret; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); + ret = sys_rt_sigpending((sigset_t __force_user *)&s, sigsetsize); set_fs(old_fs); if (!ret) { switch (_NSIG_WORDS) { @@ -394,7 +394,7 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; set_fs(KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); + ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __force_user *)&info); set_fs(old_fs); return ret; } @@ -439,7 +439,7 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return -EFAULT; set_fs(KERNEL_DS); - ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL, + ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __force_user *)&of : NULL, count); set_fs(old_fs); diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 091508b..2cc2c2d 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -4,10 +4,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX -1: lock +672: lock .section .smp_locks,"a" .balign 4 - .long 1b - . + .long 672b - . .previous .endm #else @@ -15,6 +15,45 @@ .endm #endif +#ifdef KERNEXEC_PLUGIN + .macro pax_force_retaddr_bts rip=0 + btsq $63,\rip(%rsp) + .endm +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_BTS + .macro pax_force_retaddr rip=0, reload=0 + btsq $63,\rip(%rsp) + .endm + .macro pax_force_fptr ptr + btsq $63,\ptr + .endm + .macro pax_set_fptr_mask + .endm +#endif +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR + .macro pax_force_retaddr rip=0, reload=0 + .if \reload + pax_set_fptr_mask + .endif + orq %r12,\rip(%rsp) + .endm + .macro pax_force_fptr ptr + orq %r12,\ptr + .endm + .macro pax_set_fptr_mask + movabs $0x8000000000000000,%r12 + .endm +#endif +#else + .macro pax_force_retaddr rip=0, reload=0 + .endm + .macro pax_force_fptr ptr + .endm + .macro pax_force_retaddr_bts rip=0 + .endm + .macro pax_set_fptr_mask + .endm +#endif + .macro altinstruction_entry orig alt feature orig_len alt_len .long \orig - . .long \alt - . diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 37ad100..7d47faa 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -89,7 +89,7 @@ static inline int alternatives_text_reserved(void *start, void *end) ".section .discard,\"aw\",@progbits\n" \ " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".section .altinstr_replacement, \"a\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ ".previous" diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1a6c09a..fec2432 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -45,7 +45,7 @@ static inline void generic_apic_probe(void) #ifdef CONFIG_X86_LOCAL_APIC -extern unsigned int apic_verbosity; +extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 20370c6..a2eb9b0 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -34,7 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall *%%cs:apm_bios_entry\n\t" + "lcall *%%ss:apm_bios_entry\n\t" "setc %%al\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" @@ -58,7 +58,7 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" - "lcall *%%cs:apm_bios_entry\n\t" + "lcall *%%ss:apm_bios_entry\n\t" "setc %%bl\n\t" "popl %%ebp\n\t" "popl %%edi\n\t" diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 58cb6d4..a8df22ae 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -22,7 +22,18 @@ */ static inline int atomic_read(const atomic_t *v) { - return (*(volatile int *)&(v)->counter); + return (*(volatile const int *)&(v)->counter); +} + +/** + * atomic_read_unchecked - read atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically reads the value of @v. + */ +static inline int __intentional_overflow(-1) atomic_read_unchecked(const atomic_unchecked_t *v) +{ + return (*(volatile const int *)&(v)->counter); } /** @@ -38,6 +49,18 @@ static inline void atomic_set(atomic_t *v, int i) } /** + * atomic_set_unchecked - set atomic variable + * @v: pointer of type atomic_unchecked_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set_unchecked(atomic_unchecked_t *v, int i) +{ + v->counter = i; +} + +/** * atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t @@ -46,7 +69,29 @@ static inline void atomic_set(atomic_t *v, int i) */ static inline void atomic_add(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "addl %1,%0" + asm volatile(LOCK_PREFIX "addl %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_add_unchecked - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_unchecked_t + * + * Atomically adds @i to @v. + */ +static inline void atomic_add_unchecked(int i, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "addl %1,%0\n" : "+m" (v->counter) : "ir" (i)); } @@ -60,7 +105,29 @@ static inline void atomic_add(int i, atomic_t *v) */ static inline void atomic_sub(int i, atomic_t *v) { - asm volatile(LOCK_PREFIX "subl %1,%0" + asm volatile(LOCK_PREFIX "subl %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addl %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter) + : "ir" (i)); +} + +/** + * atomic_sub_unchecked - subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_unchecked_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic_sub_unchecked(int i, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "subl %1,%0\n" : "+m" (v->counter) : "ir" (i)); } @@ -78,7 +145,16 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "subl %2,%0; sete %1" + asm volatile(LOCK_PREFIX "subl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (v->counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -92,7 +168,27 @@ static inline int atomic_sub_and_test(int i, atomic_t *v) */ static inline void atomic_inc(atomic_t *v) { - asm volatile(LOCK_PREFIX "incl %0" + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter)); +} + +/** + * atomic_inc_unchecked - increment atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc_unchecked(atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0\n" : "+m" (v->counter)); } @@ -104,7 +200,27 @@ static inline void atomic_inc(atomic_t *v) */ static inline void atomic_dec(atomic_t *v) { - asm volatile(LOCK_PREFIX "decl %0" + asm volatile(LOCK_PREFIX "decl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (v->counter)); +} + +/** + * atomic_dec_unchecked - decrement atomic variable + * @v: pointer of type atomic_unchecked_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic_dec_unchecked(atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "decl %0\n" : "+m" (v->counter)); } @@ -120,7 +236,16 @@ static inline int atomic_dec_and_test(atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "decl %0; sete %1" + asm volatile(LOCK_PREFIX "decl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (v->counter), "=qm" (c) : : "memory"); return c != 0; @@ -138,7 +263,35 @@ static inline int atomic_inc_and_test(atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "incl %0; sete %1" + asm volatile(LOCK_PREFIX "incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" + : "+m" (v->counter), "=qm" (c) + : : "memory"); + return c != 0; +} + +/** + * atomic_inc_and_test_unchecked - increment and test + * @v: pointer of type atomic_unchecked_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic_inc_and_test_unchecked(atomic_unchecked_t *v) +{ + unsigned char c; + + asm volatile(LOCK_PREFIX "incl %0\n" + "sete %1\n" : "+m" (v->counter), "=qm" (c) : : "memory"); return c != 0; @@ -157,7 +310,16 @@ static inline int atomic_add_negative(int i, atomic_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "addl %2,%0; sets %1" + asm volatile(LOCK_PREFIX "addl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "+m" (v->counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -170,7 +332,7 @@ static inline int atomic_add_negative(int i, atomic_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_return(int i, atomic_t *v) +static inline int __intentional_overflow(-1) atomic_add_return(int i, atomic_t *v) { #ifdef CONFIG_M386 int __i; @@ -179,7 +341,7 @@ static inline int atomic_add_return(int i, atomic_t *v) goto no_xadd; #endif /* Modern 486+ processor */ - return i + xadd(&v->counter, i); + return i + xadd_check_overflow(&v->counter, i); #ifdef CONFIG_M386 no_xadd: /* Legacy 386 processor */ @@ -192,21 +354,58 @@ no_xadd: /* Legacy 386 processor */ } /** + * atomic_add_return_unchecked - add integer and return + * @i: integer value to add + * @v: pointer of type atomic_unchecked_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline int __intentional_overflow(-1) atomic_add_return_unchecked(int i, atomic_unchecked_t *v) +{ +#ifdef CONFIG_M386 + int __i; + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + return i + xadd(&v->counter, i); + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + raw_local_irq_save(flags); + __i = atomic_read_unchecked(v); + atomic_set_unchecked(v, i + __i); + raw_local_irq_restore(flags); + return i + __i; +#endif +} + +/** * atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ -static inline int atomic_sub_return(int i, atomic_t *v) +static inline int __intentional_overflow(-1) atomic_sub_return(int i, atomic_t *v) { return atomic_add_return(-i, v); } #define atomic_inc_return(v) (atomic_add_return(1, v)) +static inline int __intentional_overflow(-1) atomic_inc_return_unchecked(atomic_unchecked_t *v) +{ + return atomic_add_return_unchecked(1, v); +} #define atomic_dec_return(v) (atomic_sub_return(1, v)) -static inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static inline int __intentional_overflow(-1) atomic_cmpxchg(atomic_t *v, int old, int new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline int atomic_cmpxchg_unchecked(atomic_unchecked_t *v, int old, int new) { return cmpxchg(&v->counter, old, new); } @@ -216,6 +415,11 @@ static inline int atomic_xchg(atomic_t *v, int new) return xchg(&v->counter, new); } +static inline int atomic_xchg_unchecked(atomic_unchecked_t *v, int new) +{ + return xchg(&v->counter, new); +} + /** * __atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t @@ -227,12 +431,25 @@ static inline int atomic_xchg(atomic_t *v, int new) */ static inline int __atomic_add_unless(atomic_t *v, int a, int u) { - int c, old; + int c, old, new; c = atomic_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic_cmpxchg((v), c, c + (a)); + + asm volatile("addl %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "subl %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=r" (new) + : "0" (c), "ir" (a)); + + old = atomic_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; @@ -240,6 +457,48 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } +/** + * atomic_inc_not_zero_hint - increment if not null + * @v: pointer of type atomic_t + * @hint: probable value of the atomic before the increment + * + * This version of atomic_inc_not_zero() gives a hint of probable + * value of the atomic. This helps processor to not read the memory + * before doing the atomic read/modify/write cycle, lowering + * number of bus transactions on some arches. + * + * Returns: 0 if increment was not done, 1 otherwise. + */ +#define atomic_inc_not_zero_hint atomic_inc_not_zero_hint +static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) +{ + int val, c = hint, new; + + /* sanity test, should be removed by compiler if hint is a constant */ + if (!hint) + return __atomic_add_unless(v, 1, 0); + + do { + asm volatile("incl %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "decl %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=r" (new) + : "0" (c)); + + val = atomic_cmpxchg(v, c, new); + if (val == c) + return 1; + c = val; + } while (c); + + return 0; +} /* * atomic_dec_if_positive - decrement by 1 if old value positive @@ -293,14 +552,37 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) #endif /* These are x86-specific, used by some header files */ -#define atomic_clear_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "andl %0,%1" \ - : : "r" (~(mask)), "m" (*(addr)) : "memory") +static inline void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "andl %1,%0" + : "+m" (v->counter) + : "r" (~(mask)) + : "memory"); +} -#define atomic_set_mask(mask, addr) \ - asm volatile(LOCK_PREFIX "orl %0,%1" \ - : : "r" ((unsigned)(mask)), "m" (*(addr)) \ - : "memory") +static inline void atomic_clear_mask_unchecked(unsigned int mask, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "andl %1,%0" + : "+m" (v->counter) + : "r" (~(mask)) + : "memory"); +} + +static inline void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + asm volatile(LOCK_PREFIX "orl %1,%0" + : "+m" (v->counter) + : "r" (mask) + : "memory"); +} + +static inline void atomic_set_mask_unchecked(unsigned int mask, atomic_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "orl %1,%0" + : "+m" (v->counter) + : "r" (mask) + : "memory"); +} /* Atomic operations are already serializing on x86 */ #define smp_mb__before_atomic_dec() barrier() diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 24098aa..aabcac7 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -12,6 +12,14 @@ typedef struct { u64 __aligned(8) counter; } atomic64_t; +#ifdef CONFIG_PAX_REFCOUNT +typedef struct { + u64 __aligned(8) counter; +} atomic64_unchecked_t; +#else +typedef atomic64_t atomic64_unchecked_t; +#endif + #define ATOMIC64_INIT(val) { (val) } #ifdef CONFIG_X86_CMPXCHG64 @@ -38,6 +46,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n } /** + * atomic64_cmpxchg_unchecked - cmpxchg atomic64 variable + * @p: pointer to type atomic64_unchecked_t + * @o: expected value + * @n: new value + * + * Atomically sets @v to @n if it was equal to @o and returns + * the old value. + */ + +static inline long long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long long o, long long n) +{ + return cmpxchg64(&v->counter, o, n); +} + +/** * atomic64_xchg - xchg atomic64 variable * @v: pointer to type atomic64_t * @n: value to assign @@ -77,6 +100,24 @@ static inline void atomic64_set(atomic64_t *v, long long i) } /** + * atomic64_set_unchecked - set atomic64 variable + * @v: pointer to type atomic64_unchecked_t + * @n: value to assign + * + * Atomically sets the value of @v to @n. + */ +static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long long i) +{ + unsigned high = (unsigned)(i >> 32); + unsigned low = (unsigned)i; + asm volatile(ATOMIC64_ALTERNATIVE(set) + : "+b" (low), "+c" (high) + : "S" (v) + : "eax", "edx", "memory" + ); +} + +/** * atomic64_read - read atomic64 variable * @v: pointer to type atomic64_t * @@ -93,6 +134,22 @@ static inline long long atomic64_read(atomic64_t *v) } /** + * atomic64_read_unchecked - read atomic64 variable + * @v: pointer to type atomic64_unchecked_t + * + * Atomically reads the value of @v and returns it. + */ +static inline long long __intentional_overflow(-1) atomic64_read_unchecked(atomic64_unchecked_t *v) +{ + long long r; + asm volatile(ATOMIC64_ALTERNATIVE(read_unchecked) + : "=A" (r), "+c" (v) + : : "memory" + ); + return r; + } + +/** * atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t @@ -108,6 +165,22 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) return i; } +/** + * atomic64_add_return_unchecked - add and return + * @i: integer value to add + * @v: pointer to type atomic64_unchecked_t + * + * Atomically adds @i to @v and returns @i + *@v + */ +static inline long long atomic64_add_return_unchecked(long long i, atomic64_unchecked_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE(add_return_unchecked) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + /* * Other variants with different arithmetic operators: */ @@ -131,6 +204,17 @@ static inline long long atomic64_inc_return(atomic64_t *v) return a; } +static inline long long atomic64_inc_return_unchecked(atomic64_unchecked_t *v) +{ + long long a; + asm volatile(ATOMIC64_ALTERNATIVE(inc_return_unchecked) + : "=A" (a) + : "S" (v) + : "memory", "ecx" + ); + return a; +} + static inline long long atomic64_dec_return(atomic64_t *v) { long long a; @@ -159,6 +243,22 @@ static inline long long atomic64_add(long long i, atomic64_t *v) } /** + * atomic64_add_unchecked - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_unchecked_t + * + * Atomically adds @i to @v. + */ +static inline long long atomic64_add_unchecked(long long i, atomic64_unchecked_t *v) +{ + asm volatile(ATOMIC64_ALTERNATIVE_(add_unchecked, add_return_unchecked) + : "+A" (i), "+c" (v) + : : "memory" + ); + return i; +} + +/** * atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0e1cbfc..a891fc7 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -18,7 +18,19 @@ */ static inline long atomic64_read(const atomic64_t *v) { - return (*(volatile long *)&(v)->counter); + return (*(volatile const long *)&(v)->counter); +} + +/** + * atomic64_read_unchecked - read atomic64 variable + * @v: pointer of type atomic64_unchecked_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +static inline long __intentional_overflow(-1) atomic64_read_unchecked(const atomic64_unchecked_t *v) +{ + return (*(volatile const long *)&(v)->counter); } /** @@ -34,6 +46,18 @@ static inline void atomic64_set(atomic64_t *v, long i) } /** + * atomic64_set_unchecked - set atomic64 variable + * @v: pointer to type atomic64_unchecked_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic64_set_unchecked(atomic64_unchecked_t *v, long i) +{ + v->counter = i; +} + +/** * atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t @@ -42,6 +66,28 @@ static inline void atomic64_set(atomic64_t *v, long i) */ static inline void atomic64_add(long i, atomic64_t *v) { + asm volatile(LOCK_PREFIX "addq %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subq %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_add_unchecked - add integer to atomic64 variable + * @i: integer value to add + * @v: pointer to type atomic64_unchecked_t + * + * Atomically adds @i to @v. + */ +static inline void atomic64_add_unchecked(long i, atomic64_unchecked_t *v) +{ asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) : "er" (i), "m" (v->counter)); @@ -56,7 +102,29 @@ static inline void atomic64_add(long i, atomic64_t *v) */ static inline void atomic64_sub(long i, atomic64_t *v) { - asm volatile(LOCK_PREFIX "subq %1,%0" + asm volatile(LOCK_PREFIX "subq %1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addq %1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "er" (i), "m" (v->counter)); +} + +/** + * atomic64_sub_unchecked - subtract the atomic64 variable + * @i: integer value to subtract + * @v: pointer to type atomic64_unchecked_t + * + * Atomically subtracts @i from @v. + */ +static inline void atomic64_sub_unchecked(long i, atomic64_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "subq %1,%0\n" : "=m" (v->counter) : "er" (i), "m" (v->counter)); } @@ -74,7 +142,16 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" + asm volatile(LOCK_PREFIX "subq %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "addq %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "er" (i), "m" (v->counter) : "memory"); return c; @@ -88,6 +165,27 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) */ static inline void atomic64_inc(atomic64_t *v) { + asm volatile(LOCK_PREFIX "incq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_inc_unchecked - increment atomic64 variable + * @v: pointer to type atomic64_unchecked_t + * + * Atomically increments @v by 1. + */ +static inline void atomic64_inc_unchecked(atomic64_unchecked_t *v) +{ asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter)); @@ -101,7 +199,28 @@ static inline void atomic64_inc(atomic64_t *v) */ static inline void atomic64_dec(atomic64_t *v) { - asm volatile(LOCK_PREFIX "decq %0" + asm volatile(LOCK_PREFIX "decq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=m" (v->counter) + : "m" (v->counter)); +} + +/** + * atomic64_dec_unchecked - decrement atomic64 variable + * @v: pointer to type atomic64_t + * + * Atomically decrements @v by 1. + */ +static inline void atomic64_dec_unchecked(atomic64_unchecked_t *v) +{ + asm volatile(LOCK_PREFIX "decq %0\n" : "=m" (v->counter) : "m" (v->counter)); } @@ -118,7 +237,16 @@ static inline int atomic64_dec_and_test(atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "decq %0; sete %1" + asm volatile(LOCK_PREFIX "decq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "incq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "m" (v->counter) : "memory"); return c != 0; @@ -136,7 +264,16 @@ static inline int atomic64_inc_and_test(atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "incq %0; sete %1" + asm volatile(LOCK_PREFIX "incq %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "decq %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "=m" (v->counter), "=qm" (c) : "m" (v->counter) : "memory"); return c != 0; @@ -155,7 +292,16 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) { unsigned char c; - asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" + asm volatile(LOCK_PREFIX "addq %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX "subq %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "=m" (v->counter), "=qm" (c) : "er" (i), "m" (v->counter) : "memory"); return c; @@ -170,6 +316,18 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) */ static inline long atomic64_add_return(long i, atomic64_t *v) { + return i + xadd_check_overflow(&v->counter, i); +} + +/** + * atomic64_add_return_unchecked - add and return + * @i: integer value to add + * @v: pointer to type atomic64_unchecked_t + * + * Atomically adds @i to @v and returns @i + @v + */ +static inline long atomic64_add_return_unchecked(long i, atomic64_unchecked_t *v) +{ return i + xadd(&v->counter, i); } @@ -179,6 +337,10 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) } #define atomic64_inc_return(v) (atomic64_add_return(1, (v))) +static inline long atomic64_inc_return_unchecked(atomic64_unchecked_t *v) +{ + return atomic64_add_return_unchecked(1, v); +} #define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) @@ -186,6 +348,11 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) return cmpxchg(&v->counter, old, new); } +static inline long atomic64_cmpxchg_unchecked(atomic64_unchecked_t *v, long old, long new) +{ + return cmpxchg(&v->counter, old, new); +} + static inline long atomic64_xchg(atomic64_t *v, long new) { return xchg(&v->counter, new); @@ -202,17 +369,30 @@ static inline long atomic64_xchg(atomic64_t *v, long new) */ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) { - long c, old; + long c, old, new; c = atomic64_read(v); for (;;) { - if (unlikely(c == (u))) + if (unlikely(c == u)) break; - old = atomic64_cmpxchg((v), c, c + (a)); + + asm volatile("add %2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "sub %2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "=r" (new) + : "0" (c), "ir" (a)); + + old = atomic64_cmpxchg(v, c, new); if (likely(old == c)) break; c = old; } - return c != (u); + return c != u; } #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 1775d6e..f84af0c 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -38,7 +38,7 @@ * a mask operation on a byte. */ #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr)) -#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3)) +#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((volatile void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) /** @@ -344,7 +344,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); * * Undefined if no bit exists, so code should check against 0 first. */ -static inline unsigned long __ffs(unsigned long word) +static inline unsigned long __intentional_overflow(-1) __ffs(unsigned long word) { asm("bsf %1,%0" : "=r" (word) @@ -358,7 +358,7 @@ static inline unsigned long __ffs(unsigned long word) * * Undefined if no zero exists, so code should check against ~0UL first. */ -static inline unsigned long ffz(unsigned long word) +static inline unsigned long __intentional_overflow(-1) ffz(unsigned long word) { asm("bsf %1,%0" : "=r" (word) @@ -372,7 +372,7 @@ static inline unsigned long ffz(unsigned long word) * * Undefined if no set bit exists, so code should check against 0 first. */ -static inline unsigned long __fls(unsigned long word) +static inline unsigned long __intentional_overflow(-1) __fls(unsigned long word) { asm("bsr %1,%0" : "=r" (word) @@ -419,7 +419,7 @@ static inline int ffs(int x) * set bit if value is nonzero. The last (most significant) bit is * at position 32. */ -static inline int fls(int x) +static inline int __intentional_overflow(-1) fls(int x) { int r; #ifdef CONFIG_X86_CMOV diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 5e1a2ee..c9f9533 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -11,10 +11,15 @@ #include /* Physical address where kernel should be loaded. */ -#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ +#define ____LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +#ifndef __ASSEMBLY__ +extern unsigned char __LOAD_PHYSICAL_ADDR[]; +#define LOAD_PHYSICAL_ADDR ((unsigned long)__LOAD_PHYSICAL_ADDR) +#endif + /* Minimum kernel alignment, as a power of two */ #ifdef CONFIG_X86_64 #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 48f99f1..d78ebf9 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h @@ -5,12 +5,13 @@ /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +#define L1_CACHE_BYTES (_AC(1,UL) << L1_CACHE_SHIFT) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_only __attribute__((__section__(".data..read_only"))) #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT -#define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) +#define INTERNODE_CACHE_BYTES (_AC(1,UL) << INTERNODE_CACHE_SHIFT) #ifdef CONFIG_X86_VSMP #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 4e12668..501d239 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -26,7 +26,7 @@ static inline unsigned long get_page_memtype(struct page *pg) unsigned long pg_flags = pg->flags & _PGMT_MASK; if (pg_flags == _PGMT_DEFAULT) - return -1; + return ~0UL; else if (pg_flags == _PGMT_WC) return _PAGE_CACHE_WC; else if (pg_flags == _PGMT_UC_MINUS) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index a9e3a74..44966f3 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -82,103 +82,113 @@ For 32-bit we have the following conventions - kernel is built with #define RSP (152) #define SS (160) -#define ARGOFFSET R11 -#define SWFRAME ORIG_RAX +#define ARGOFFSET R15 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 - subq $9*8+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET 9*8+\addskip - movq_cfi rdi, 8*8 - movq_cfi rsi, 7*8 - movq_cfi rdx, 6*8 + subq $ORIG_RAX-ARGOFFSET+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+\addskip + movq_cfi rdi, RDI + movq_cfi rsi, RSI + movq_cfi rdx, RDX .if \save_rcx - movq_cfi rcx, 5*8 + movq_cfi rcx, RCX .endif - movq_cfi rax, 4*8 + movq_cfi rax, RAX .if \save_r891011 - movq_cfi r8, 3*8 - movq_cfi r9, 2*8 - movq_cfi r10, 1*8 - movq_cfi r11, 0*8 + movq_cfi r8, R8 + movq_cfi r9, R9 + movq_cfi r10, R10 + movq_cfi r11, R11 .endif +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR + movq_cfi r12, R12 +#endif + .endm -#define ARG_SKIP (9*8) +#define ARG_SKIP ORIG_RAX .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ rstor_r8910=1, rstor_rdx=1 + +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR + movq_cfi_restore R12, r12 +#endif + .if \rstor_r11 - movq_cfi_restore 0*8, r11 + movq_cfi_restore R11, r11 .endif .if \rstor_r8910 - movq_cfi_restore 1*8, r10 - movq_cfi_restore 2*8, r9 - movq_cfi_restore 3*8, r8 + movq_cfi_restore R10, r10 + movq_cfi_restore R9, r9 + movq_cfi_restore R8, r8 .endif .if \rstor_rax - movq_cfi_restore 4*8, rax + movq_cfi_restore RAX, rax .endif .if \rstor_rcx - movq_cfi_restore 5*8, rcx + movq_cfi_restore RCX, rcx .endif .if \rstor_rdx - movq_cfi_restore 6*8, rdx + movq_cfi_restore RDX, rdx .endif - movq_cfi_restore 7*8, rsi - movq_cfi_restore 8*8, rdi + movq_cfi_restore RSI, rsi + movq_cfi_restore RDI, rdi - .if ARG_SKIP+\addskip > 0 - addq $ARG_SKIP+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) + .if ORIG_RAX+\addskip > 0 + addq $ORIG_RAX+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET -(ORIG_RAX+\addskip) .endif .endm - .macro LOAD_ARGS offset, skiprax=0 - movq \offset(%rsp), %r11 - movq \offset+8(%rsp), %r10 - movq \offset+16(%rsp), %r9 - movq \offset+24(%rsp), %r8 - movq \offset+40(%rsp), %rcx - movq \offset+48(%rsp), %rdx - movq \offset+56(%rsp), %rsi - movq \offset+64(%rsp), %rdi + .macro LOAD_ARGS skiprax=0 + movq R11(%rsp), %r11 + movq R10(%rsp), %r10 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RCX(%rsp), %rcx + movq RDX(%rsp), %rdx + movq RSI(%rsp), %rsi + movq RDI(%rsp), %rdi .if \skiprax .else - movq \offset+72(%rsp), %rax + movq RAX(%rsp), %rax .endif .endm -#define REST_SKIP (6*8) - .macro SAVE_REST - subq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET REST_SKIP - movq_cfi rbx, 5*8 - movq_cfi rbp, 4*8 - movq_cfi r12, 3*8 - movq_cfi r13, 2*8 - movq_cfi r14, 1*8 - movq_cfi r15, 0*8 + movq_cfi rbx, RBX + movq_cfi rbp, RBP + +#ifndef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR + movq_cfi r12, R12 +#endif + + movq_cfi r13, R13 + movq_cfi r14, R14 + movq_cfi r15, R15 .endm .macro RESTORE_REST - movq_cfi_restore 0*8, r15 - movq_cfi_restore 1*8, r14 - movq_cfi_restore 2*8, r13 - movq_cfi_restore 3*8, r12 - movq_cfi_restore 4*8, rbp - movq_cfi_restore 5*8, rbx - addq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET -(REST_SKIP) + movq_cfi_restore R15, r15 + movq_cfi_restore R14, r14 + movq_cfi_restore R13, r13 + +#ifndef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR + movq_cfi_restore R12, r12 +#endif + + movq_cfi_restore RBP, rbp + movq_cfi_restore RBX, rbx .endm .macro SAVE_ALL diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index 46fc474..b02b0f9 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -31,6 +31,14 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err_ptr, int *dst_err_ptr); +asmlinkage __wsum csum_partial_copy_generic_to_user(const void *src, void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + +asmlinkage __wsum csum_partial_copy_generic_from_user(const void *src, void *dst, + int len, __wsum sum, + int *src_err_ptr, int *dst_err_ptr); + /* * Note: when you get a NULL pointer exception here this means someone * passed in an incorrect kernel address to one of these functions. @@ -50,7 +58,7 @@ static inline __wsum csum_partial_copy_from_user(const void __user *src, int *err_ptr) { might_sleep(); - return csum_partial_copy_generic((__force void *)src, dst, + return csum_partial_copy_generic_from_user((__force void *)src, dst, len, sum, err_ptr, NULL); } @@ -178,7 +186,7 @@ static inline __wsum csum_and_copy_to_user(const void *src, { might_sleep(); if (access_ok(VERIFY_WRITE, dst, len)) - return csum_partial_copy_generic(src, (__force void *)dst, + return csum_partial_copy_generic_to_user(src, (__force void *)dst, len, sum, NULL, err_ptr); if (len) diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 5d3acdf..6447a02 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void) __compiletime_error("Bad argument size for cmpxchg"); extern void __xadd_wrong_size(void) __compiletime_error("Bad argument size for xadd"); +extern void __xadd_check_overflow_wrong_size(void) + __compiletime_error("Bad argument size for xadd_check_overflow"); /* * Constants for operation sizes. On 32-bit, the 64-bit size it set to @@ -195,6 +197,34 @@ extern void __xadd_wrong_size(void) __ret; \ }) +#define __xadd_check_overflow(ptr, inc, lock) \ + ({ \ + __typeof__ (*(ptr)) __ret = (inc); \ + switch (sizeof(*(ptr))) { \ + case __X86_CASE_L: \ + asm volatile (lock "xaddl %0, %1\n" \ + "jno 0f\n" \ + "mov %0,%1\n" \ + "int $4\n0:\n" \ + _ASM_EXTABLE(0b, 0b) \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + case __X86_CASE_Q: \ + asm volatile (lock "xaddq %q0, %1\n" \ + "jno 0f\n" \ + "mov %0,%1\n" \ + "int $4\n0:\n" \ + _ASM_EXTABLE(0b, 0b) \ + : "+r" (__ret), "+m" (*(ptr)) \ + : : "memory", "cc"); \ + break; \ + default: \ + __xadd_check_overflow_wrong_size(); \ + } \ + __ret; \ + }) + /* * xadd() adds "inc" to "*ptr" and atomically returns the previous * value of "*ptr". @@ -207,4 +237,6 @@ extern void __xadd_wrong_size(void) #define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ") #define xadd_local(ptr, inc) __xadd((ptr), (inc), "") +#define xadd_check_overflow(ptr, inc) __xadd_check_overflow((ptr), (inc), LOCK_PREFIX) + #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 30d737e..9830a9b 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -194,7 +194,7 @@ typedef struct user_regs_struct32 compat_elf_gregset_t; * as pointers because the syscall entry code will have * appropriately converted them already. */ -typedef u32 compat_uptr_t; +typedef u32 __user compat_uptr_t; static inline void __user *compat_ptr(compat_uptr_t uptr) { diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b8a5fe5..fbbe2c2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -198,8 +198,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Prevention */ #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ +#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -364,7 +365,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) ".section .discard,\"aw\",@progbits\n" " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ ".previous\n" - ".section .altinstr_replacement,\"ax\"\n" + ".section .altinstr_replacement,\"a\"\n" "3: movb $1,%0\n" "4:\n" ".previous\n" diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 3225868..e0fb1f6 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -16,6 +17,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in desc->type = (info->read_exec_only ^ 1) << 1; desc->type |= info->contents << 2; + desc->type |= info->seg_not_present ^ 1; desc->s = 1; desc->dpl = 0x3; @@ -34,17 +36,12 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in } extern struct desc_ptr idt_descr; -extern gate_desc idt_table[]; - -struct gdt_page { - struct desc_struct gdt[GDT_ENTRIES]; -} __attribute__((aligned(PAGE_SIZE))); - -DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); +extern gate_desc idt_table[256]; +extern struct desc_struct cpu_gdt_table[NR_CPUS][PAGE_SIZE / sizeof(struct desc_struct)]; static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return per_cpu(gdt_page, cpu).gdt; + return cpu_gdt_table[cpu]; } #ifdef CONFIG_X86_64 @@ -69,8 +66,14 @@ static inline void pack_gate(gate_desc *gate, unsigned char type, unsigned long base, unsigned dpl, unsigned flags, unsigned short seg) { - gate->a = (seg << 16) | (base & 0xffff); - gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8); + gate->gate.offset_low = base; + gate->gate.seg = seg; + gate->gate.reserved = 0; + gate->gate.type = type; + gate->gate.s = 0; + gate->gate.dpl = dpl; + gate->gate.p = 1; + gate->gate.offset_high = base >> 16; } #endif @@ -115,12 +118,16 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { + pax_open_kernel(); memcpy(&idt[entry], gate, sizeof(*gate)); + pax_close_kernel(); } static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) { + pax_open_kernel(); memcpy(&ldt[entry], desc, 8); + pax_close_kernel(); } static inline void @@ -134,7 +141,9 @@ native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int default: size = sizeof(*gdt); break; } + pax_open_kernel(); memcpy(&gdt[entry], desc, size); + pax_close_kernel(); } static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, @@ -207,7 +216,9 @@ static inline void native_set_ldt(const void *addr, unsigned int entries) static inline void native_load_tr_desc(void) { + pax_open_kernel(); asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); + pax_close_kernel(); } static inline void native_load_gdt(const struct desc_ptr *dtr) @@ -244,8 +255,10 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) struct desc_struct *gdt = get_cpu_gdt_table(cpu); unsigned int i; + pax_open_kernel(); for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; + pax_close_kernel(); } /* This intentionally ignores lm, since 32-bit apps don't have that field. */ @@ -292,7 +305,7 @@ static inline void load_LDT(mm_context_t *pc) preempt_enable(); } -static inline unsigned long get_desc_base(const struct desc_struct *desc) +static inline unsigned long __intentional_overflow(-1) get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); } @@ -315,7 +328,7 @@ static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) desc->limit = (limit >> 16) & 0xf; } -static inline void _set_gate(int gate, unsigned type, void *addr, +static inline void _set_gate(int gate, unsigned type, const void *addr, unsigned dpl, unsigned ist, unsigned seg) { gate_desc s; @@ -334,7 +347,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr, * Pentium F0 0F bugfix can have resulted in the mapped * IDT being write-protected. */ -static inline void set_intr_gate(unsigned int n, void *addr) +static inline void set_intr_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); @@ -364,19 +377,19 @@ static inline void alloc_intr_gate(unsigned int n, void *addr) /* * This routine sets up an interrupt gate at directory privilege level 3. */ -static inline void set_system_intr_gate(unsigned int n, void *addr) +static inline void set_system_intr_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); } -static inline void set_system_trap_gate(unsigned int n, void *addr) +static inline void set_system_trap_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); } -static inline void set_trap_gate(unsigned int n, void *addr) +static inline void set_trap_gate(unsigned int n, const void *addr) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); @@ -385,19 +398,31 @@ static inline void set_trap_gate(unsigned int n, void *addr) static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) { BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); + _set_gate(n, GATE_TASK, (const void *)0, 0, 0, (gdt_entry<<3)); } -static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) +static inline void set_intr_gate_ist(int n, const void *addr, unsigned ist) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); } -static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) +static inline void set_system_intr_gate_ist(int n, const void *addr, unsigned ist) { BUG_ON((unsigned)n > 0xFF); _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); } +#ifdef CONFIG_X86_32 +static inline void set_user_cs(unsigned long base, unsigned long limit, int cpu) +{ + struct desc_struct d; + + if (likely(limit)) + limit = (limit - 1UL) >> PAGE_SHIFT; + pack_descriptor(&d, base, limit, 0xFB, 0xC); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_DEFAULT_USER_CS, &d, DESCTYPE_S); +} +#endif + #endif /* _ASM_X86_DESC_H */ diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index 278441f..b95a174 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -31,6 +31,12 @@ struct desc_struct { unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; }; + struct { + u16 offset_low; + u16 seg; + unsigned reserved: 8, type: 4, s: 1, dpl: 2, p: 1; + unsigned offset_high: 16; + } gate; }; } __attribute__((packed)); diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9a2d644..5abb141 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -33,7 +33,7 @@ __mod; \ }) -static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +static inline u64 __intentional_overflow(-1) div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) { union { u64 v64; diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 908b969..a1f4eb4 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -69,7 +69,7 @@ struct e820map { #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 -#define BIOS_BEGIN 0x000a0000 +#define BIOS_BEGIN 0x000c0000 #define BIOS_END 0x00100000 #define BIOS_ROM_BASE 0xffe00000 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 035cd81..db1e26d 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -95,6 +95,7 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern void efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); +extern void efi_setup_pgd(void); #ifndef CONFIG_EFI /* diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 5f962df..7289f09 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -238,7 +238,25 @@ extern int force_personality32; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ +#ifdef CONFIG_PAX_SEGMEXEC +#define ELF_ET_DYN_BASE ((current->mm->pax_flags & MF_PAX_SEGMEXEC) ? SEGMEXEC_TASK_SIZE/3*2 : TASK_SIZE/3*2) +#else #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) +#endif + +#ifdef CONFIG_PAX_ASLR +#ifdef CONFIG_X86_32 +#define PAX_ELF_ET_DYN_BASE 0x10000000UL + +#define PAX_DELTA_MMAP_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) +#define PAX_DELTA_STACK_LEN (current->mm->pax_flags & MF_PAX_SEGMEXEC ? 15 : 16) +#else +#define PAX_ELF_ET_DYN_BASE 0x400000UL + +#define PAX_DELTA_MMAP_LEN ((test_thread_flag(TIF_IA32)) ? 16 : TASK_SIZE_MAX_SHIFT - PAGE_SHIFT - 3) +#define PAX_DELTA_STACK_LEN ((test_thread_flag(TIF_IA32)) ? 16 : TASK_SIZE_MAX_SHIFT - PAGE_SHIFT - 3) +#endif +#endif /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, @@ -291,9 +309,7 @@ do { \ #define ARCH_DLINFO \ do { \ - if (vdso_enabled) \ - NEW_AUX_ENT(AT_SYSINFO_EHDR, \ - (unsigned long)current->mm->context.vdso); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso); \ } while (0) #define AT_SYSINFO 32 @@ -304,7 +320,7 @@ do { \ #endif /* !CONFIG_X86_32 */ -#define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) +#define VDSO_CURRENT_BASE (current->mm->context.vdso) #define VDSO_ENTRY \ ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) @@ -318,9 +334,6 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, extern int syscall32_setup_pages(struct linux_binprm *, int exstack); #define compat_arch_setup_additional_pages syscall32_setup_pages -extern unsigned long arch_randomize_brk(struct mm_struct *mm); -#define arch_randomize_brk arch_randomize_brk - /* * True on X86_32 or when emulating IA32 on X86_64 */ diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index cc70c1c..d96d011 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -15,6 +15,6 @@ enum reboot_type { extern enum reboot_type reboot_type; -extern void machine_emergency_restart(void); +extern void machine_emergency_restart(void) __noreturn; #endif /* _ASM_X86_EMERGENCY_RESTART_H */ diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index dbe82a5..459eb0b 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -229,18 +229,18 @@ static struct fd_routine_l { int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); } fd_routine[] = { { - request_dma, - free_dma, - get_dma_residue, - dma_mem_alloc, - hard_dma_setup + ._request_dma = request_dma, + ._free_dma = free_dma, + ._get_dma_residue = get_dma_residue, + ._dma_mem_alloc = dma_mem_alloc, + ._dma_setup = hard_dma_setup }, { - vdma_request_dma, - vdma_nop, - vdma_get_dma_residue, - vdma_mem_alloc, - vdma_dma_setup + ._request_dma = vdma_request_dma, + ._free_dma = vdma_nop, + ._get_dma_residue = vdma_get_dma_residue, + ._dma_mem_alloc = vdma_mem_alloc, + ._dma_setup = vdma_dma_setup } }; diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index d09bb03..0a3629b 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -12,20 +12,22 @@ #include #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + typecheck(u32 __user *, uaddr); \ asm volatile("1:\t" insn "\n" \ "2:\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ _ASM_EXTABLE(1b, 3b) \ - : "=r" (oldval), "=r" (ret), "+m" (*uaddr) \ + : "=r" (oldval), "=r" (ret), "+m" (*(u32 __user *)____m(uaddr))\ : "i" (-EFAULT), "0" (oparg), "1" (0)) #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + typecheck(u32 __user *, uaddr); \ asm volatile("1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ - "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ + "2:\t" LOCK_PREFIX __copyuser_seg"cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ "3:\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ @@ -34,7 +36,7 @@ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=&a" (oldval), "=&r" (ret), \ - "+m" (*uaddr), "=&r" (tem) \ + "+m" (*(u32 __user *)____m(uaddr)), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)) static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) @@ -61,10 +63,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + __futex_atomic_op1(__copyuser_seg"xchgl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + __futex_atomic_op1(LOCK_PREFIX __copyuser_seg"xaddl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: @@ -123,13 +125,13 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + asm volatile("1:\t" LOCK_PREFIX __copyuser_seg"cmpxchgl %4, %2\n" "2:\t.section .fixup, \"ax\"\n" "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" _ASM_EXTABLE(1b, 3b) - : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "+r" (ret), "=a" (oldval), "+m" (*(u32 __user *)____m(uaddr)) : "i" (-EFAULT), "r" (newval), "1" (oldval) : "memory" ); diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eb92a6e..b98b2f4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -136,8 +136,8 @@ extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); /* Statistics */ -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; +extern atomic_unchecked_t irq_err_count; +extern atomic_unchecked_t irq_mis_count; /* EISA */ extern void eisa_set_level_irq(unsigned int irq); diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a850b4d..1d8dfb7 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -88,10 +88,12 @@ static inline void sanitize_i387_state(struct task_struct *tsk) } #ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int fxrstor_checking(struct i387_fxsave_struct __user *fx) { int err; + fx = (struct i387_fxsave_struct __user *)____m(fx); + /* See comment in fxsave() below. */ #ifdef CONFIG_AS_FXSAVEQ asm volatile("1: fxrstorq %[fx]\n\t" @@ -121,6 +123,8 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; + fx = (struct i387_fxsave_struct __user *)____m(fx); + /* * Clear the bytes not touched by the fxsave and reserved * for the SW usage. @@ -189,15 +193,15 @@ static inline void fpu_fxsave(struct fpu *fpu) #else /* CONFIG_X86_32 */ /* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int fxrstor_checking(struct i387_fxsave_struct __user *fx) { /* * The "nop" is needed to make the instructions the same * length. */ alternative_input( - "nop ; frstor %1", - "fxrstor %1", + __copyuser_seg" frstor %1; nop", + __copyuser_seg" fxrstor %1", X86_FEATURE_FXSR, "m" (*fx)); @@ -256,7 +260,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) static inline int fpu_fxrstor_checking(struct fpu *fpu) { - return fxrstor_checking(&fpu->state->fxsave); + int ret; + mm_segment_t fs; + + fs = get_fs(); + set_fs(KERNEL_DS); + ret = fxrstor_checking(&fpu->state->fxsave); + set_fs(fs); + return ret; } static inline int fpu_restore_checking(struct fpu *fpu) @@ -424,7 +435,7 @@ static inline bool interrupted_kernel_fpu_idle(void) static inline bool interrupted_user_mode(void) { struct pt_regs *regs = get_irq_regs(); - return regs && user_mode_vm(regs); + return regs && user_mode(regs); } /* diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index a203659..9889f1c 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -62,7 +62,7 @@ struct legacy_pic { void (*init)(int auto_eoi); int (*irq_pending)(unsigned int irq); void (*make_irq)(unsigned int irq); -}; +} __do_const; extern struct legacy_pic *legacy_pic; extern struct legacy_pic null_legacy_pic; diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 1f7e625..541485f 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -126,6 +126,12 @@ typedef struct compat_siginfo { int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + struct { + unsigned int _call_addr; /* calling insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d8e8eef..1765f78 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -51,12 +51,12 @@ static inline void name(type val, volatile void __iomem *addr) \ "m" (*(volatile type __force *)addr) barrier); } build_mmio_read(readb, "b", unsigned char, "=q", :"memory") -build_mmio_read(readw, "w", unsigned short, "=r", :"memory") -build_mmio_read(readl, "l", unsigned int, "=r", :"memory") +build_mmio_read(__intentional_overflow(-1) readw, "w", unsigned short, "=r", :"memory") +build_mmio_read(__intentional_overflow(-1) readl, "l", unsigned int, "=r", :"memory") build_mmio_read(__readb, "b", unsigned char, "=q", ) -build_mmio_read(__readw, "w", unsigned short, "=r", ) -build_mmio_read(__readl, "l", unsigned int, "=r", ) +build_mmio_read(__intentional_overflow(-1) __readw, "w", unsigned short, "=r", ) +build_mmio_read(__intentional_overflow(-1) __readl, "l", unsigned int, "=r", ) build_mmio_write(writeb, "b", unsigned char, "q", :"memory") build_mmio_write(writew, "w", unsigned short, "r", :"memory") @@ -184,7 +184,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) return ioremap_nocache(offset, size); } -extern void iounmap(volatile void __iomem *addr); +extern void iounmap(const volatile void __iomem *addr); extern void set_iounmap_nonlazy(void); @@ -194,6 +194,17 @@ extern void set_iounmap_nonlazy(void); #include +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +static inline int valid_phys_addr_range(unsigned long addr, size_t count) +{ + return ((addr + count + PAGE_SIZE - 1) >> PAGE_SHIFT) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; +} + +static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + return (pfn + (count >> PAGE_SHIFT)) < (1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) ? 1 : 0; +} + /* * Convert a virtual cached pointer to an uncached pointer */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 0a8b519..80e7d5b 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -141,6 +141,11 @@ static inline notrace unsigned long arch_local_irq_save(void) sti; \ sysexit +#define GET_CR0_INTO_RDI mov %cr0, %rdi +#define SET_RDI_INTO_CR0 mov %rdi, %cr0 +#define GET_CR3_INTO_RDI mov %cr3, %rdi +#define SET_RDI_INTO_CR3 mov %rdi, %cr3 + #else #define INTERRUPT_RETURN iret #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5478825..839e88c 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -37,13 +37,8 @@ typedef u8 kprobe_opcode_t; #define RELATIVEJUMP_SIZE 5 #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 -#define MAX_STACK_SIZE 64 -#define MIN_STACK_SIZE(ADDR) \ - (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) \ - ? (MAX_STACK_SIZE) \ - : (((unsigned long)current_thread_info()) + \ - THREAD_SIZE - (unsigned long)(ADDR))) +#define MAX_STACK_SIZE 64UL +#define MIN_STACK_SIZE(ADDR) min(MAX_STACK_SIZE, current->thread.sp0 - (unsigned long)(ADDR)) #define flush_insn_slot(p) do { } while (0) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9171618..fe2b1da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -45,6 +45,7 @@ #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 0xFFFFFF0000000000ULL) +#define CR3_PCID_INVD (1UL << 63) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ @@ -460,7 +461,7 @@ struct kvm_arch { unsigned int n_requested_mmu_pages; unsigned int n_max_mmu_pages; unsigned int indirect_shadow_pages; - atomic_t invlpg_counter; + atomic_unchecked_t invlpg_counter; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 9cdae5d..3534f04 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -11,33 +11,97 @@ typedef struct { atomic_long_t a; } local_t; +typedef struct { + atomic_long_unchecked_t a; +} local_unchecked_t; + #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) +#define local_read_unchecked(l) atomic_long_read_unchecked(&(l)->a) #define local_set(l, i) atomic_long_set(&(l)->a, (i)) +#define local_set_unchecked(l, i) atomic_long_set_unchecked(&(l)->a, (i)) static inline void local_inc(local_t *l) { - asm volatile(_ASM_INC "%0" + asm volatile(_ASM_INC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_DEC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (l->a.counter)); +} + +static inline void local_inc_unchecked(local_unchecked_t *l) +{ + asm volatile(_ASM_INC "%0\n" : "+m" (l->a.counter)); } static inline void local_dec(local_t *l) { - asm volatile(_ASM_DEC "%0" + asm volatile(_ASM_DEC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_INC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (l->a.counter)); +} + +static inline void local_dec_unchecked(local_unchecked_t *l) +{ + asm volatile(_ASM_DEC "%0\n" : "+m" (l->a.counter)); } static inline void local_add(long i, local_t *l) { - asm volatile(_ASM_ADD "%1,%0" + asm volatile(_ASM_ADD "%1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_SUB "%1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (l->a.counter) + : "ir" (i)); +} + +static inline void local_add_unchecked(long i, local_unchecked_t *l) +{ + asm volatile(_ASM_ADD "%1,%0\n" : "+m" (l->a.counter) : "ir" (i)); } static inline void local_sub(long i, local_t *l) { - asm volatile(_ASM_SUB "%1,%0" + asm volatile(_ASM_SUB "%1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_ADD "%1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + : "+m" (l->a.counter) + : "ir" (i)); +} + +static inline void local_sub_unchecked(long i, local_unchecked_t *l) +{ + asm volatile(_ASM_SUB "%1,%0\n" : "+m" (l->a.counter) : "ir" (i)); } @@ -55,7 +119,16 @@ static inline int local_sub_and_test(long i, local_t *l) { unsigned char c; - asm volatile(_ASM_SUB "%2,%0; sete %1" + asm volatile(_ASM_SUB "%2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_ADD "%2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (l->a.counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -73,7 +146,16 @@ static inline int local_dec_and_test(local_t *l) { unsigned char c; - asm volatile(_ASM_DEC "%0; sete %1" + asm volatile(_ASM_DEC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_INC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (l->a.counter), "=qm" (c) : : "memory"); return c != 0; @@ -91,7 +173,16 @@ static inline int local_inc_and_test(local_t *l) { unsigned char c; - asm volatile(_ASM_INC "%0; sete %1" + asm volatile(_ASM_INC "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_DEC "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sete %1\n" : "+m" (l->a.counter), "=qm" (c) : : "memory"); return c != 0; @@ -110,7 +201,16 @@ static inline int local_add_negative(long i, local_t *l) { unsigned char c; - asm volatile(_ASM_ADD "%2,%0; sets %1" + asm volatile(_ASM_ADD "%2,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_SUB "%2,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + + "sets %1\n" : "+m" (l->a.counter), "=qm" (c) : "ir" (i) : "memory"); return c; @@ -133,7 +233,15 @@ static inline long local_add_return(long i, local_t *l) #endif /* Modern 486+ processor */ __i = i; - asm volatile(_ASM_XADD "%0, %1;" + asm volatile(_ASM_XADD "%0, %1\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + _ASM_MOV "%0,%1\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+r" (i), "+m" (l->a.counter) : : "memory"); return i + __i; @@ -148,6 +256,38 @@ no_xadd: /* Legacy 386 processor */ #endif } +/** + * local_add_return_unchecked - add and return + * @i: integer value to add + * @l: pointer to type local_unchecked_t + * + * Atomically adds @i to @l and returns @i + @l + */ +static inline long local_add_return_unchecked(long i, local_unchecked_t *l) +{ + long __i; +#ifdef CONFIG_M386 + unsigned long flags; + if (unlikely(boot_cpu_data.x86 <= 3)) + goto no_xadd; +#endif + /* Modern 486+ processor */ + __i = i; + asm volatile(_ASM_XADD "%0, %1\n" + : "+r" (i), "+m" (l->a.counter) + : : "memory"); + return i + __i; + +#ifdef CONFIG_M386 +no_xadd: /* Legacy 386 processor */ + local_irq_save(flags); + __i = local_read_unchecked(l); + local_set_unchecked(l, i + __i); + local_irq_restore(flags); + return i + __i; +#endif +} + static inline long local_sub_return(long i, local_t *l) { return local_add_return(-i, l); @@ -158,6 +298,8 @@ static inline long local_sub_return(long i, local_t *l) #define local_cmpxchg(l, o, n) \ (cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_cmpxchg_unchecked(l, o, n) \ + (cmpxchg_local(&((l)->a.counter), (o), (n))) /* Always has a lock prefix */ #define local_xchg(l, n) (xchg(&((l)->a.counter), (n))) diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 593e51d..fa69c9a 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h @@ -5,4 +5,14 @@ #include +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_32 +#define arch_mmap_check i386_mmap_check +int i386_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags); +#endif +#endif +#endif + #endif /* _ASM_X86_MMAN_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5f55e69..e20bfb1 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,7 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; + struct desc_struct *ldt; int size; #ifdef CONFIG_X86_64 @@ -18,7 +18,19 @@ typedef struct { #endif struct mutex lock; - void *vdso; + unsigned long vdso; + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + unsigned long user_cs_base; + unsigned long user_cs_limit; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpumask_t cpu_user_cs_mask; +#endif + +#endif +#endif } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6902152..da4283a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -24,6 +24,18 @@ void destroy_context(struct mm_struct *mm); static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + unsigned int i; + pgd_t *pgd; + + pax_open_kernel(); + pgd = get_cpu_pgd(smp_processor_id()); + for (i = USER_PGD_PTRS; i < 2 * USER_PGD_PTRS; ++i) + set_pgd_batched(pgd+i, native_make_pgd(0)); + pax_close_kernel(); +#endif + #ifdef CONFIG_SMP if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); @@ -34,16 +46,30 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned cpu = smp_processor_id(); +#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) + int tlbstate = TLBSTATE_OK; +#endif if (likely(prev != next)) { #ifdef CONFIG_SMP +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) + tlbstate = percpu_read(cpu_tlbstate.state); +#endif percpu_write(cpu_tlbstate.state, TLBSTATE_OK); percpu_write(cpu_tlbstate.active_mm, next); #endif cpumask_set_cpu(cpu, mm_cpumask(next)); /* Re-load page tables */ +#ifdef CONFIG_PAX_PER_CPU_PGD + pax_open_kernel(); + __clone_user_pgds(get_cpu_pgd(cpu), next->pgd); + __shadow_user_pgds(get_cpu_pgd(cpu) + USER_PGD_PTRS, next->pgd); + pax_close_kernel(); + load_cr3(get_cpu_pgd(cpu)); +#else load_cr3(next->pgd); +#endif /* stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); @@ -53,9 +79,38 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ if (unlikely(prev->context.ldt != next->context.ldt)) load_LDT_nolock(&next->context); - } + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + if (!(__supported_pte_mask & _PAGE_NX)) { + smp_mb__before_clear_bit(); + cpu_clear(cpu, prev->context.cpu_user_cs_mask); + smp_mb__after_clear_bit(); + cpu_set(cpu, next->context.cpu_user_cs_mask); + } +#endif + +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) + if (unlikely(prev->context.user_cs_base != next->context.user_cs_base || + prev->context.user_cs_limit != next->context.user_cs_limit)) + set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); #ifdef CONFIG_SMP + else if (unlikely(tlbstate != TLBSTATE_OK)) + set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); +#endif +#endif + + } else { + +#ifdef CONFIG_PAX_PER_CPU_PGD + pax_open_kernel(); + __clone_user_pgds(get_cpu_pgd(cpu), next->pgd); + __shadow_user_pgds(get_cpu_pgd(cpu) + USER_PGD_PTRS, next->pgd); + pax_close_kernel(); + load_cr3(get_cpu_pgd(cpu)); +#endif + +#ifdef CONFIG_SMP percpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); @@ -64,11 +119,28 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. */ + +#ifndef CONFIG_PAX_PER_CPU_PGD load_cr3(next->pgd); +#endif + load_LDT_nolock(&next->context); + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) + if (!(__supported_pte_mask & _PAGE_NX)) + cpu_set(cpu, next->context.cpu_user_cs_mask); +#endif + +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC)) +#ifdef CONFIG_PAX_PAGEEXEC + if (!((next->pax_flags & MF_PAX_PAGEEXEC) && (__supported_pte_mask & _PAGE_NX))) +#endif + set_user_cs(next->context.user_cs_base, next->context.user_cs_limit, cpu); +#endif + } +#endif } -#endif } #define activate_mm(prev, next) \ diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index 9eae775..c914fea 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,6 +5,7 @@ #ifdef CONFIG_X86_64 /* X86_64 does not define MODULE_PROC_FAMILY */ +#define MODULE_PROC_FAMILY "" #elif defined CONFIG_M386 #define MODULE_PROC_FAMILY "386 " #elif defined CONFIG_M486 @@ -59,8 +60,20 @@ #error unknown processor family #endif -#ifdef CONFIG_X86_32 -# define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY +#ifdef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_BTS +#define MODULE_PAX_KERNEXEC "KERNEXEC_BTS " +#elif defined(CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR) +#define MODULE_PAX_KERNEXEC "KERNEXEC_OR " +#else +#define MODULE_PAX_KERNEXEC "" #endif +#ifdef CONFIG_PAX_MEMORY_UDEREF +#define MODULE_PAX_UDEREF "UDEREF " +#else +#define MODULE_PAX_UDEREF "" +#endif + +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_PAX_KERNEXEC MODULE_PAX_UDEREF + #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 8ca8283..8dc71fa 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -55,11 +55,21 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, * virt_to_page(kaddr) returns a valid pointer if and only if * virt_addr_valid(kaddr) returns true. */ -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) extern bool __virt_addr_valid(unsigned long kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr)) +#ifdef CONFIG_GRKERNSEC_KSTACKOVERFLOW +#define virt_to_page(kaddr) \ + ({ \ + const void *__kaddr = (const void *)(kaddr); \ + BUG_ON(!virt_addr_valid(__kaddr)); \ + pfn_to_page(__pa(__kaddr) >> PAGE_SHIFT); \ + }) +#else +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#endif + #endif /* __ASSEMBLY__ */ #include diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index da4e762..592f614 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -11,7 +11,7 @@ #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) #ifdef CONFIG_DEBUG_VIRTUAL -extern unsigned long __phys_addr(unsigned long); +extern unsigned long __intentional_overflow(-1) __phys_addr(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) #endif diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index a9e9937..a398cb4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define THREAD_ORDER 1 +#define THREAD_ORDER 2 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) @@ -55,9 +55,9 @@ void copy_page(void *to, void *from); /* duplicated to the one in bootmem.h */ extern unsigned long max_pfn; -extern unsigned long phys_base; +extern const unsigned long phys_base; -extern unsigned long __phys_addr(unsigned long); +extern unsigned long __intentional_overflow(-1) __phys_addr(unsigned long); #define __phys_reloc_hide(x) (x) #define vmemmap ((struct page *)VMEMMAP_START) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 91e758b..cac1cd6 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -601,7 +601,7 @@ static inline pmd_t __pmd(pmdval_t val) return (pmd_t) { ret }; } -static inline pmdval_t pmd_val(pmd_t pmd) +static inline __intentional_overflow(-1) pmdval_t pmd_val(pmd_t pmd) { pmdval_t ret; @@ -667,6 +667,18 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) val); } +static inline void set_pgd_batched(pgd_t *pgdp, pgd_t pgd) +{ + pgdval_t val = native_pgd_val(pgd); + + if (sizeof(pgdval_t) > sizeof(long)) + PVOP_VCALL3(pv_mmu_ops.set_pgd_batched, pgdp, + val, (u64)val >> 32); + else + PVOP_VCALL2(pv_mmu_ops.set_pgd_batched, pgdp, + val); +} + static inline void pgd_clear(pgd_t *pgdp) { set_pgd(pgdp, __pgd(0)); @@ -751,6 +763,21 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, pv_mmu_ops.set_fixmap(idx, phys, flags); } +#ifdef CONFIG_PAX_KERNEXEC +static inline unsigned long pax_open_kernel(void) +{ + return PVOP_CALL0(unsigned long, pv_mmu_ops.pax_open_kernel); +} + +static inline unsigned long pax_close_kernel(void) +{ + return PVOP_CALL0(unsigned long, pv_mmu_ops.pax_close_kernel); +} +#else +static inline unsigned long pax_open_kernel(void) { return 0; } +static inline unsigned long pax_close_kernel(void) { return 0; } +#endif + #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) static inline int arch_spin_is_locked(struct arch_spinlock *lock) @@ -967,7 +994,7 @@ extern void default_banner(void); #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) -#define PARA_INDIRECT(addr) *%cs:addr +#define PARA_INDIRECT(addr) *%ss:addr #endif #define INTERRUPT_RETURN \ @@ -1044,6 +1071,21 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) + +#define GET_CR0_INTO_RDI \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ + mov %rax,%rdi + +#define SET_RDI_INTO_CR0 \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0) + +#define GET_CR3_INTO_RDI \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr3); \ + mov %rax,%rdi + +#define SET_RDI_INTO_CR3 \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_write_cr3) + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index faf2c04..055c010 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -84,7 +84,7 @@ struct pv_init_ops { */ unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, unsigned long addr, unsigned len); -}; +} __no_const __no_randomize_layout; struct pv_lazy_ops { @@ -92,13 +92,13 @@ struct pv_lazy_ops { void (*enter)(void); void (*leave)(void); void (*flush)(void); -}; +} __no_randomize_layout; struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); unsigned long (*get_tsc_khz)(void); -}; +} __no_const __no_randomize_layout; struct pv_cpu_ops { /* hooks for various privileged instructions */ @@ -194,7 +194,7 @@ struct pv_cpu_ops { void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); -}; +} __no_const __no_randomize_layout; struct pv_irq_ops { /* @@ -217,7 +217,7 @@ struct pv_irq_ops { #ifdef CONFIG_X86_64 void (*adjust_exception_frame)(void); #endif -}; +} __no_randomize_layout; struct pv_apic_ops { #ifdef CONFIG_X86_LOCAL_APIC @@ -225,7 +225,7 @@ struct pv_apic_ops { unsigned long start_eip, unsigned long start_esp); #endif -}; +} __no_const __no_randomize_layout; struct pv_mmu_ops { unsigned long (*read_cr2)(void); @@ -314,6 +314,7 @@ struct pv_mmu_ops { struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); + void (*set_pgd_batched)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ #endif /* PAGETABLE_LEVELS >= 3 */ @@ -325,7 +326,13 @@ struct pv_mmu_ops { an mfn. We can tell which is which from the index. */ void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); -}; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long (*pax_open_kernel)(void); + unsigned long (*pax_close_kernel)(void); +#endif + +} __no_randomize_layout; struct arch_spinlock; struct pv_lock_ops { @@ -335,11 +342,14 @@ struct pv_lock_ops { void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags); int (*spin_trylock)(struct arch_spinlock *lock); void (*spin_unlock)(struct arch_spinlock *lock); -}; +} __no_const __no_randomize_layout; /* This contains all the paravirt structures: we get a convenient * number for each function using the offset which we use to indicate - * what to patch. */ + * what to patch. + * shouldn't be randomized due to the "NEAT TRICK" in paravirt.c + */ + struct paravirt_patch_template { struct pv_init_ops pv_init_ops; struct pv_time_ops pv_time_ops; @@ -348,7 +358,7 @@ struct paravirt_patch_template { struct pv_apic_ops pv_apic_ops; struct pv_mmu_ops pv_mmu_ops; struct pv_lock_ops pv_lock_ops; -}; +} __no_randomize_layout; extern struct pv_info pv_info; extern struct pv_init_ops pv_init_ops; diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index b4389a4..7024269 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -63,6 +63,13 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); +} + +static inline void pmd_populate_user(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); } @@ -99,12 +106,22 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, #ifdef CONFIG_X86_PAE extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); +static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) +{ + pud_populate(mm, pudp, pmd); +} #else /* !CONFIG_X86_PAE */ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd))); } + +static inline void pud_populate_kernel(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); + set_pud(pud, __pud(_KERNPG_TABLE | __pa(pmd))); +} #endif /* CONFIG_X86_PAE */ #if PAGETABLE_LEVELS > 3 @@ -114,6 +131,12 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))); } +static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(pud))); +} + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 98391db..8f6984e 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -18,7 +18,9 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + pax_open_kernel(); *pmdp = pmd; + pax_close_kernel(); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cb00ccc..17e9054 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -92,12 +92,16 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + pax_open_kernel(); set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd)); + pax_close_kernel(); } static inline void native_set_pud(pud_t *pudp, pud_t pud) { + pax_open_kernel(); set_64bit((unsigned long long *)(pudp), native_pud_val(pud)); + pax_close_kernel(); } /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6be9909..5c476fc 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -44,6 +44,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #ifndef __PAGETABLE_PUD_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define set_pgd_batched(pgdp, pgd) native_set_pgd_batched(pgdp, pgd) #define pgd_clear(pgd) native_pgd_clear(pgd) #endif @@ -81,12 +82,53 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #define arch_end_context_switch(prev) do {} while(0) +#define pax_open_kernel() native_pax_open_kernel() +#define pax_close_kernel() native_pax_close_kernel() #endif /* CONFIG_PARAVIRT */ +#define __HAVE_ARCH_PAX_OPEN_KERNEL +#define __HAVE_ARCH_PAX_CLOSE_KERNEL + +#ifdef CONFIG_PAX_KERNEXEC +static inline unsigned long native_pax_open_kernel(void) +{ + unsigned long cr0; + + preempt_disable(); + barrier(); + cr0 = read_cr0() ^ X86_CR0_WP; + BUG_ON(cr0 & X86_CR0_WP); + write_cr0(cr0); + barrier(); + return cr0 ^ X86_CR0_WP; +} + +static inline unsigned long native_pax_close_kernel(void) +{ + unsigned long cr0; + + barrier(); + cr0 = read_cr0() ^ X86_CR0_WP; + BUG_ON(!(cr0 & X86_CR0_WP)); + write_cr0(cr0); + barrier(); + preempt_enable_no_resched(); + return cr0 ^ X86_CR0_WP; +} +#else +static inline unsigned long native_pax_open_kernel(void) { return 0; } +static inline unsigned long native_pax_close_kernel(void) { return 0; } +#endif + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ +static inline int pte_user(pte_t pte) +{ + return pte_val(pte) & _PAGE_USER; +} + static inline int pte_dirty(pte_t pte) { return pte_flags(pte) & _PAGE_DIRTY; @@ -147,6 +189,11 @@ static inline unsigned long pud_pfn(pud_t pud) return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pgd_pfn(pgd_t pgd) +{ + return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) @@ -200,9 +247,29 @@ static inline pte_t pte_wrprotect(pte_t pte) return pte_clear_flags(pte, _PAGE_RW); } +static inline pte_t pte_mkread(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_USER); +} + static inline pte_t pte_mkexec(pte_t pte) { - return pte_clear_flags(pte, _PAGE_NX); +#ifdef CONFIG_X86_PAE + if (__supported_pte_mask & _PAGE_NX) + return pte_clear_flags(pte, _PAGE_NX); + else +#endif + return pte_set_flags(pte, _PAGE_USER); +} + +static inline pte_t pte_exprotect(pte_t pte) +{ +#ifdef CONFIG_X86_PAE + if (__supported_pte_mask & _PAGE_NX) + return pte_set_flags(pte, _PAGE_NX); + else +#endif + return pte_clear_flags(pte, _PAGE_USER); } static inline pte_t pte_mkdirty(pte_t pte) @@ -394,6 +461,15 @@ pte_t *populate_extra_pte(unsigned long vaddr); #endif #ifndef __ASSEMBLY__ + +#ifdef CONFIG_PAX_PER_CPU_PGD +extern pgd_t cpu_pgd[NR_CPUS][PTRS_PER_PGD]; +static inline pgd_t *get_cpu_pgd(unsigned int cpu) +{ + return cpu_pgd[cpu]; +} +#endif + #include static inline int pte_none(pte_t pte) @@ -515,7 +591,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) +#define pud_page(pud) pfn_to_page((pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT) /* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) @@ -555,7 +631,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) +#define pgd_page(pgd) pfn_to_page((pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT) /* to find an entry in a page-table-directory. */ static inline unsigned long pud_index(unsigned long address) @@ -570,7 +646,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pgd_flags(pgd) & ~(_PAGE_USER | _PAGE_NX)) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) @@ -593,7 +669,12 @@ static inline int pgd_none(pgd_t pgd) * pgd_offset() returns a (pgd_t *) * pgd_index() is used get the offset into the pgd page's array of pgd_t's; */ -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) + +#ifdef CONFIG_PAX_PER_CPU_PGD +#define pgd_offset_cpu(cpu, address) (get_cpu_pgd(cpu) + pgd_index(address)) +#endif + /* * a shortcut which implies the use of the kernel's pgd, instead * of a process's @@ -604,6 +685,22 @@ static inline int pgd_none(pgd_t pgd) #define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY) +#ifdef CONFIG_X86_32 +#define USER_PGD_PTRS KERNEL_PGD_BOUNDARY +#else +#define TASK_SIZE_MAX_SHIFT CONFIG_TASK_SIZE_MAX_SHIFT +#define USER_PGD_PTRS (_AC(1,UL) << (TASK_SIZE_MAX_SHIFT - PGDIR_SHIFT)) + +#ifdef CONFIG_PAX_MEMORY_UDEREF +#ifdef __ASSEMBLY__ +#define pax_user_shadow_base pax_user_shadow_base(%rip) +#else +extern unsigned long pax_user_shadow_base; +#endif +#endif + +#endif + #ifndef __ASSEMBLY__ extern int direct_gbpages; @@ -768,11 +865,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, * dst and src can be on the same page, but the range must not overlap, * and must not cross a page boundary. */ -static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) +static inline void clone_pgd_range(pgd_t *dst, const pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + pax_open_kernel(); + while (count--) + *dst++ = *src++; + pax_close_kernel(); } +#ifdef CONFIG_PAX_PER_CPU_PGD +extern void __clone_user_pgds(pgd_t *dst, const pgd_t *src); +#endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +extern void __shadow_user_pgds(pgd_t *dst, const pgd_t *src); +#else +static inline void __shadow_user_pgds(pgd_t *dst, const pgd_t *src) {} +#endif #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 0c92113..34a77c6 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -25,9 +25,6 @@ struct mm_struct; struct vm_area_struct; -extern pgd_t swapper_pg_dir[1024]; -extern pgd_t initial_page_table[1024]; - static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); @@ -48,6 +45,12 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); # include #endif +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t initial_page_table[PTRS_PER_PGD]; +#ifdef CONFIG_X86_PAE +extern pmd_t swapper_pm_dir[PTRS_PER_PGD][PTRS_PER_PMD]; +#endif + #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ @@ -62,7 +65,9 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* Clear a kernel PTE and flush it from the TLB */ #define kpte_clear_flush(ptep, vaddr) \ do { \ + pax_open_kernel(); \ pte_clear(&init_mm, (vaddr), (ptep)); \ + pax_close_kernel(); \ __flush_tlb_one((vaddr)); \ } while (0) @@ -74,6 +79,9 @@ do { \ #endif /* !__ASSEMBLY__ */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + /* * kern_addr_valid() is (1) for FLATMEM and (0) for * SPARSEMEM and DISCONTIGMEM diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index ed5903b..c7fe163 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -8,7 +8,7 @@ */ #ifdef CONFIG_X86_PAE # include -# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) # define PMD_MASK (~(PMD_SIZE - 1)) #else # include @@ -46,6 +46,19 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif +#ifdef CONFIG_PAX_KERNEXEC +#ifndef __ASSEMBLY__ +extern unsigned char MODULES_EXEC_VADDR[]; +extern unsigned char MODULES_EXEC_END[]; +#endif +#include +#define ktla_ktva(addr) (addr + LOAD_PHYSICAL_ADDR + PAGE_OFFSET) +#define ktva_ktla(addr) (addr - LOAD_PHYSICAL_ADDR - PAGE_OFFSET) +#else +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) +#endif + #define MODULES_VADDR VMALLOC_START #define MODULES_END VMALLOC_END #define MODULES_LEN (MODULES_VADDR - MODULES_END) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 975f709..9f779c9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -16,10 +16,14 @@ extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; +extern pud_t level3_vmalloc_start_pgt[512]; +extern pud_t level3_vmalloc_end_pgt[512]; +extern pud_t level3_vmemmap_pgt[512]; +extern pud_t level2_vmemmap_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; -extern pmd_t level2_ident_pgt[512]; -extern pgd_t init_level4_pgt[]; +extern pmd_t level2_ident_pgt[512*2]; +extern pgd_t init_level4_pgt[512]; #define swapper_pg_dir init_level4_pgt @@ -61,7 +65,9 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + pax_open_kernel(); *pmdp = pmd; + pax_close_kernel(); } static inline void native_pmd_clear(pmd_t *pmd) @@ -97,7 +103,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { + pax_open_kernel(); *pudp = pud; + pax_close_kernel(); } static inline void native_pud_clear(pud_t *pud) @@ -107,6 +115,13 @@ static inline void native_pud_clear(pud_t *pud) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { + pax_open_kernel(); + *pgdp = pgd; + pax_close_kernel(); +} + +static inline void native_set_pgd_batched(pgd_t *pgdp, pgd_t pgd) +{ *pgdp = pgd; } diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 51817fa..19195fc 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -59,7 +59,12 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define MODULES_EXEC_VADDR MODULES_VADDR +#define MODULES_EXEC_END MODULES_END #define ESPFIX_PGD_ENTRY _AC(-2, UL) #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) +#define ktla_ktva(addr) (addr) +#define ktva_ktla(addr) (addr) + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 013286a..8b42f4f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -16,13 +16,12 @@ #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_PAT 7 /* on 4KB pages */ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ -#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +#define _PAGE_BIT_SPECIAL 9 /* special mappings, no associated struct page */ #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ #define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ -#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 -#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 -#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ +#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SPECIAL +#define _PAGE_BIT_SPLITTING _PAGE_BIT_SPECIAL /* only valid on a PSE pmd */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ /* If _PAGE_BIT_PRESENT is clear, we use these: */ @@ -40,7 +39,6 @@ #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) @@ -57,8 +55,10 @@ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) -#else +#elif defined(CONFIG_KMEMCHECK) #define _PAGE_NX (_AT(pteval_t, 0)) +#else +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #endif #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) @@ -96,6 +96,9 @@ #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ _PAGE_ACCESSED) +#define PAGE_READONLY_NOEXEC PAGE_READONLY +#define PAGE_SHARED_NOEXEC PAGE_SHARED + #define __PAGE_KERNEL_EXEC \ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) #define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) @@ -106,7 +109,7 @@ #define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) @@ -168,8 +171,8 @@ * bits are combined, this will alow user to access the high address mapped * VDSO in the presence of CONFIG_COMPAT_VDSO */ -#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ -#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PTE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ +#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ #endif @@ -207,7 +210,17 @@ static inline pgdval_t pgd_flags(pgd_t pgd) { return native_pgd_val(pgd) & PTE_FLAGS_MASK; } +#endif +#if PAGETABLE_LEVELS == 3 +#include +#endif + +#if PAGETABLE_LEVELS == 2 +#include +#endif + +#ifndef __ASSEMBLY__ #if PAGETABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; @@ -221,8 +234,6 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else -#include - static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.pgd); @@ -242,8 +253,6 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else -#include - static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.pgd); @@ -283,7 +292,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern void set_nx(void); -extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 2dddb31..100c638 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -62,6 +62,7 @@ #define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ +#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */ /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f7c89e2..553040d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -110,7 +110,7 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); +} __attribute__((__aligned__(SMP_CACHE_BYTES))) __randomize_layout; #define X86_VENDOR_INTEL 0 #define X86_VENDOR_CYRIX 1 @@ -266,7 +266,7 @@ struct tss_struct { } ____cacheline_aligned; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); +extern struct tss_struct init_tss[NR_CPUS]; /* * Save the original ist values for checking stack pointers during debugging @@ -859,11 +859,18 @@ static inline void spin_lock_prefetch(const void *x) */ #define TASK_SIZE PAGE_OFFSET #define TASK_SIZE_MAX TASK_SIZE + +#ifdef CONFIG_PAX_SEGMEXEC +#define SEGMEXEC_TASK_SIZE (TASK_SIZE / 2) +#define STACK_TOP ((current->mm->pax_flags & MF_PAX_SEGMEXEC)?SEGMEXEC_TASK_SIZE:TASK_SIZE) +#else #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX STACK_TOP +#endif + +#define STACK_TOP_MAX TASK_SIZE #define INIT_THREAD { \ - .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ @@ -877,7 +884,7 @@ static inline void spin_lock_prefetch(const void *x) */ #define INIT_TSS { \ .x86_tss = { \ - .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .sp0 = sizeof(init_stack) + (long)&init_stack - 8, \ .ss0 = __KERNEL_DS, \ .ss1 = __KERNEL_CS, \ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ @@ -888,11 +895,7 @@ static inline void spin_lock_prefetch(const void *x) extern unsigned long thread_saved_pc(struct task_struct *tsk); #define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) -#define KSTK_TOP(info) \ -({ \ - unsigned long *__ptr = (unsigned long *)(info); \ - (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ -}) +#define KSTK_TOP(info) ((container_of(info, struct task_struct, tinfo))->thread.sp0) /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. @@ -907,7 +910,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(task) \ ({ \ struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ + __regs__ = (struct pt_regs *)((task)->thread.sp0); \ __regs__ - 1; \ }) @@ -917,13 +920,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * User space process size. 47bits minus one guard page. */ -#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_MAX ((1UL << TASK_SIZE_MAX_SHIFT) - PAGE_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ #define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \ - 0xc0000000 : 0xFFFFe000) + 0xc0000000 : 0xFFFFf000) #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ IA32_PAGE_OFFSET : TASK_SIZE_MAX) @@ -934,11 +937,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ - .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ + .sp0 = (unsigned long)&init_stack + sizeof(init_stack) - 16 \ } #define INIT_TSS { \ - .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ + .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) - 16 \ } /* @@ -960,6 +963,10 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, */ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) +#ifdef CONFIG_PAX_SEGMEXEC +#define SEGMEXEC_TASK_UNMAPPED_BASE (PAGE_ALIGN(SEGMEXEC_TASK_SIZE / 3)) +#endif + #define KSTK_EIP(task) (task_pt_regs(task)->ip) /* Get/set a process' ability to use the timestamp counter instruction */ @@ -972,7 +979,8 @@ extern int set_tsc_mode(unsigned int val); extern int amd_get_nb_id(int cpu); struct aperfmperf { - u64 aperf, mperf; + u64 aperf __intentional_overflow(-1); + u64 mperf __intentional_overflow(-1); }; static inline void get_aperfmperf(struct aperfmperf *am) diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 7b0a55a..ad115bf 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h @@ -49,7 +49,6 @@ #define EFLAGS 144 #define RSP 152 #define SS 160 -#define ARGOFFSET R11 #endif /* __ASSEMBLY__ */ /* top of stack page */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0581a85..d8c7f13 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -156,28 +156,29 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) } /* - * user_mode_vm(regs) determines whether a register set came from user mode. + * user_mode(regs) determines whether a register set came from user mode. * This is true if V8086 mode was enabled OR if the register set was from * protected mode with RPL-3 CS value. This tricky test checks that with * one comparison. Many places in the kernel can bypass this full check - * if they have already ruled out V8086 mode, so user_mode(regs) can be used. + * if they have already ruled out V8086 mode, so user_mode_novm(regs) can + * be used. */ -static inline int user_mode(struct pt_regs *regs) +static inline int user_mode_novm(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->cs & SEGMENT_RPL_MASK) == USER_RPL; #else - return !!(regs->cs & 3); + return !!(regs->cs & SEGMENT_RPL_MASK); #endif } -static inline int user_mode_vm(struct pt_regs *regs) +static inline int user_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= USER_RPL; #else - return user_mode(regs); + return user_mode_novm(regs); #endif } @@ -193,15 +194,16 @@ static inline int v8086_mode(struct pt_regs *regs) #ifdef CONFIG_X86_64 static inline bool user_64bit_mode(struct pt_regs *regs) { + unsigned long cs = regs->cs & 0xffff; #ifndef CONFIG_PARAVIRT /* * On non-paravirt systems, this is the only long mode CPL 3 * selector. We do not allow long mode selectors in the LDT. */ - return regs->cs == __USER_CS; + return cs == __USER_CS; #else /* Headers are too twisted for this to go in paravirt.h. */ - return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; + return cs == __USER_CS || cs == pv_info.extra_user_64bit_cs; #endif } #endif diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 92f29706..d0a1a53 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -6,19 +6,19 @@ struct pt_regs; struct machine_ops { - void (*restart)(char *cmd); - void (*halt)(void); - void (*power_off)(void); + void (* __noreturn restart)(char *cmd); + void (* __noreturn halt)(void); + void (* __noreturn power_off)(void); void (*shutdown)(void); void (*crash_shutdown)(struct pt_regs *); - void (*emergency_restart)(void); -}; + void (* __noreturn emergency_restart)(void); +} __no_const; extern struct machine_ops machine_ops; void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); -void machine_real_restart(unsigned int type); +void __noreturn machine_real_restart(unsigned int type); /* These must match dispatch_table in reboot_32.S */ #define MRR_BIOS 0 #define MRR_APM 1 diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 2dbe4a7..ce1db00 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -64,6 +64,14 @@ static inline void __down_read(struct rw_semaphore *sem) { asm volatile("# beginning down_read\n\t" LOCK_PREFIX _ASM_INC "(%1)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX _ASM_DEC "(%1)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* adds 0x00000001 */ " jns 1f\n" " call call_rwsem_down_read_failed\n" @@ -85,6 +93,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) "1:\n\t" " mov %1,%2\n\t" " add %3,%2\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "sub %3,%2\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + " jle 2f\n\t" LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" @@ -104,6 +120,14 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) long tmp; asm volatile("# beginning down_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %1,(%2)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* adds 0xffff0001, returns the old value */ " test %1,%1\n\t" /* was the count 0 before? */ @@ -141,6 +165,14 @@ static inline void __up_read(struct rw_semaphore *sem) long tmp; asm volatile("# beginning __up_read\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %1,(%2)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* subtracts 1, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" /* expects old value in %edx */ @@ -159,6 +191,14 @@ static inline void __up_write(struct rw_semaphore *sem) long tmp; asm volatile("# beginning __up_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + "mov %1,(%2)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* subtracts 0xffff0001, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" /* expects old value in %edx */ @@ -176,6 +216,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { asm volatile("# beginning __downgrade_write\n\t" LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX _ASM_SUB "%2,(%1)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + /* * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) @@ -194,7 +242,15 @@ static inline void __downgrade_write(struct rw_semaphore *sem) */ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) { - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" + asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX _ASM_SUB "%1,%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (sem->count) : "er" (delta)); } @@ -204,7 +260,7 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) */ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) { - return delta + xadd(&sem->count, delta); + return delta + xadd_check_overflow(&sem->count, delta); } #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 5e64171..f58957e 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -64,10 +64,15 @@ * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] * 28 - stack_canary-20 [ for stack protector ] - * 29 - unused - * 30 - unused + * 29 - PCI BIOS CS + * 30 - PCI BIOS DS * 31 - TSS for double fault handler */ +#define GDT_ENTRY_KERNEXEC_EFI_CS (1) +#define GDT_ENTRY_KERNEXEC_EFI_DS (2) +#define __KERNEXEC_EFI_CS (GDT_ENTRY_KERNEXEC_EFI_CS*8) +#define __KERNEXEC_EFI_DS (GDT_ENTRY_KERNEXEC_EFI_DS*8) + #define GDT_ENTRY_TLS_MIN 6 #define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) @@ -79,6 +84,8 @@ #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) +#define GDT_ENTRY_KERNEXEC_KERNEL_CS (4) + #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1) #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4) @@ -104,6 +111,12 @@ #define __KERNEL_STACK_CANARY 0 #endif +#define GDT_ENTRY_PCIBIOS_CS (GDT_ENTRY_KERNEL_BASE+17) +#define __PCIBIOS_CS (GDT_ENTRY_PCIBIOS_CS * 8) + +#define GDT_ENTRY_PCIBIOS_DS (GDT_ENTRY_KERNEL_BASE+18) +#define __PCIBIOS_DS (GDT_ENTRY_PCIBIOS_DS * 8) + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* @@ -141,7 +154,7 @@ */ /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xFFFCU) == PNP_CS32 || ((x) & 0xFFFCU) == PNP_CS16) #else @@ -165,6 +178,8 @@ #define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3) #define __USER32_DS __USER_DS +#define GDT_ENTRY_KERNEXEC_KERNEL_CS 7 + #define GDT_ENTRY_TSS 8 /* needs two entries */ #define GDT_ENTRY_LDT 10 /* needs two entries */ #define GDT_ENTRY_TLS_MIN 12 @@ -185,6 +200,7 @@ #endif #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) +#define __KERNEXEC_KERNEL_CS (GDT_ENTRY_KERNEXEC_KERNEL_CS*8) #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3) #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 73b11bc..d4a3b63 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -36,7 +36,7 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU(u16, cpu_llc_id); -DECLARE_PER_CPU(int, cpu_number); +DECLARE_PER_CPU(unsigned int, cpu_number); static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -77,7 +77,7 @@ struct smp_ops { void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); -}; +} __no_const; /* Globals due to paravirt */ extern void set_cpu_sibling_map(int cpu); @@ -192,14 +192,8 @@ extern unsigned disabled_cpus __cpuinitdata; extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() (percpu_read(cpu_number)) - -#define stack_smp_processor_id() \ -({ \ - struct thread_info *ti; \ - __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \ - ti->cpu; \ -}) +#define raw_smp_processor_id() (percpu_read(cpu_number)) +#define stack_smp_processor_id() raw_smp_processor_id() #define safe_smp_processor_id() smp_processor_id() #endif diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 972c260..43ab1fd 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -188,6 +188,14 @@ static inline int arch_write_can_lock(arch_rwlock_t *lock) static inline void arch_read_lock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX READ_LOCK_SIZE(inc) " (%0)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + "jns 1f\n" "call __read_lock_failed\n\t" "1:\n" @@ -197,6 +205,14 @@ static inline void arch_read_lock(arch_rwlock_t *rw) static inline void arch_write_lock(arch_rwlock_t *rw) { asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX WRITE_LOCK_ADD(%1) "(%0)\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + "jz 1f\n" "call __write_lock_failed\n\t" "1:\n" @@ -226,13 +242,29 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) static inline void arch_read_unlock(arch_rwlock_t *rw) { - asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" + asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX READ_LOCK_SIZE(dec) " %0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + :"+m" (rw->lock) : : "memory"); } static inline void arch_write_unlock(arch_rwlock_t *rw) { - asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" + asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0\n" + +#ifdef CONFIG_PAX_REFCOUNT + "jno 0f\n" + LOCK_PREFIX WRITE_LOCK_SUB(%1) "%0\n" + "int $4\n0:\n" + _ASM_EXTABLE(0b, 0b) +#endif + : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 1575177..cb23f52 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -48,7 +48,7 @@ * head_32 for boot CPU and setup_per_cpu_areas() for others. */ #define GDT_STACK_CANARY_INIT \ - [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18), + [GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x17), /* * Initialize the stackprotector canary value. @@ -113,7 +113,7 @@ static inline void setup_stack_canary_segment(int cpu) static inline void load_stack_canary_segment(void) { -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && !defined(CONFIG_PAX_MEMORY_UDEREF) asm volatile ("mov %0, %%gs" : : "r" (0)); #endif } diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 70bbe39..4ae2bd4 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -11,28 +11,20 @@ extern int kstack_depth_to_print; -struct thread_info; +struct task_struct; struct stacktrace_ops; -typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo, - unsigned long *stack, - unsigned long bp, - const struct stacktrace_ops *ops, - void *data, - unsigned long *end, - int *graph); +typedef unsigned long walk_stack_t(struct task_struct *task, + void *stack_start, + unsigned long *stack, + unsigned long bp, + const struct stacktrace_ops *ops, + void *data, + unsigned long *end, + int *graph); -extern unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); - -extern unsigned long -print_context_stack_bp(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end, int *graph); +extern walk_stack_t print_context_stack; +extern walk_stack_t print_context_stack_bp; /* Generic stack tracer with callbacks */ @@ -40,7 +32,7 @@ struct stacktrace_ops { void (*address)(void *data, unsigned long address, int reliable); /* On negative return stop dumping */ int (*stack)(void *data, char *name); - walk_stack_t walk_stack; + walk_stack_t *walk_stack; }; void dump_trace(struct task_struct *tsk, struct pt_regs *regs, diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index cb23852..2dde194 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -40,7 +40,7 @@ asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, compat_sigset_t __user *, unsigned int); asmlinkage long sys32_alarm(unsigned int); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index c4a348f..e2ad7ea 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -13,6 +13,7 @@ #ifndef _ASM_X86_SYSCALL_H #define _ASM_X86_SYSCALL_H +#include #include #include @@ -86,6 +87,12 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } +static inline int syscall_get_arch(struct task_struct *task, + struct pt_regs *regs) +{ + return AUDIT_ARCH_I386; +} + #else /* CONFIG_X86_64 */ static inline void syscall_get_arguments(struct task_struct *task, @@ -210,6 +217,22 @@ static inline void syscall_set_arguments(struct task_struct *task, } } +static inline int syscall_get_arch(struct task_struct *task, + struct pt_regs *regs) +{ +#ifdef CONFIG_IA32_EMULATION + /* + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + * + * TIF_IA32 tasks should always have TS_COMPAT set at + * system call time. + */ + if (task_thread_info(task)->status & TS_COMPAT) + return AUDIT_ARCH_I386; +#endif + return AUDIT_ARCH_X86_64; +} #endif /* CONFIG_X86_32 */ #endif /* _ASM_X86_SYSCALL_H */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index d75adff..c0cc78b 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -125,7 +125,7 @@ do { \ "call __switch_to\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ - "movq %P[thread_info](%%rsi),%%r8\n\t" \ + "movq "__percpu_arg([thread_info])",%%r8\n\t" \ "movq %%rax,%%rdi\n\t" \ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ "jnz ret_from_fork\n\t" \ @@ -136,7 +136,7 @@ do { \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [_tif_fork] "i" (_TIF_FORK), \ - [thread_info] "i" (offsetof(struct task_struct, stack)), \ + [thread_info] "m" (current_tinfo), \ [current_task] "m" (current_task) \ __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) @@ -196,7 +196,7 @@ static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); - return __limit + 1; + return __limit; } static inline void native_clts(void) @@ -390,13 +390,13 @@ static inline void clflush(volatile void *__p) void cpu_idle_wait(void); -extern unsigned long arch_align_stack(unsigned long sp); +#define arch_align_stack(x) ((x) & ~0xfUL) extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); bool set_pm_idle_to_default(void); -void stop_this_cpu(void *dummy); +void stop_this_cpu(void *dummy) __noreturn; /* * Force strict CPU ordering. diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d7ef849..b1b009a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * low level task data that entry.S needs immediate access to @@ -24,7 +25,6 @@ struct exec_domain; #include struct thread_info { - struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ @@ -34,18 +34,12 @@ struct thread_info { mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; -#ifdef CONFIG_X86_32 - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - */ - __u8 supervisor_stack[0]; -#endif + unsigned long lowest_stack; int uaccess_err; }; -#define INIT_THREAD_INFO(tsk) \ +#define INIT_THREAD_INFO \ { \ - .task = &tsk, \ .exec_domain = &default_exec_domain, \ .flags = 0, \ .cpu = 0, \ @@ -56,7 +50,7 @@ struct thread_info { }, \ } -#define init_thread_info (init_thread_union.thread_info) +#define init_thread_info (init_thread_union.stack) #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ @@ -95,6 +89,7 @@ struct thread_info { #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ +#define TIF_GRSEC_SETXID 29 /* update credentials on syscall entry/exit */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -117,16 +112,17 @@ struct thread_info { #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_GRSEC_SETXID (1 << TIF_GRSEC_SETXID) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | _TIF_GRSEC_SETXID) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_GRSEC_SETXID) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -136,7 +132,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_GRSEC_SETXID) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ @@ -170,45 +167,40 @@ struct thread_info { ret; \ }) -#ifdef CONFIG_X86_32 - -#define STACK_WARN (THREAD_SIZE/8) -/* - * macros/functions for gaining access to the thread information structure - * - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ - - -/* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -#else /* !__ASSEMBLY__ */ - +#ifdef __ASSEMBLY__ /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg + mov PER_CPU_VAR(current_tinfo), reg /* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg +#define GET_THREAD_INFO_WITH_ESP(reg) GET_THREAD_INFO(reg) +#else +/* how to get the thread information struct from C */ +DECLARE_PER_CPU(struct thread_info *, current_tinfo); + +static __always_inline struct thread_info *current_thread_info(void) +{ + return percpu_read_stable(current_tinfo); +} +#endif + +#ifdef CONFIG_X86_32 + +#define STACK_WARN (THREAD_SIZE/8) +/* + * macros/functions for gaining access to the thread information structure + * + * preempt_count needs to be 1 initially, until the scheduler is functional. + */ +#ifndef __ASSEMBLY__ + +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; #endif #else /* X86_32 */ -#include -#define KERNEL_STACK_OFFSET (5*8) - /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. @@ -216,21 +208,8 @@ static inline struct thread_info *current_thread_info(void) #ifndef __ASSEMBLY__ DECLARE_PER_CPU(unsigned long, kernel_stack); -static inline struct thread_info *current_thread_info(void) -{ - struct thread_info *ti; - ti = (void *)(percpu_read_stable(kernel_stack) + - KERNEL_STACK_OFFSET - THREAD_SIZE); - return ti; -} - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movq PER_CPU_VAR(kernel_stack),reg ; \ - subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg - +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("rsp") __used; #endif #endif /* !X86_32 */ @@ -264,5 +243,16 @@ extern void arch_task_cache_init(void); extern void free_thread_info(struct thread_info *ti); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define arch_task_cache_init arch_task_cache_init + +#define __HAVE_THREAD_FUNCTIONS +#define task_thread_info(task) (&(task)->tinfo) +#define task_stack_page(task) ((task)->stack) +#define setup_thread_stack(p, org) do {} while (0) +#define end_of_stack(p) ((unsigned long *)task_stack_page(p) + 1) + +#define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR +extern struct task_struct *alloc_task_struct_node(int node); +extern void free_task_struct(struct task_struct *); + #endif #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 36361bf..916c53c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,12 @@ #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +void __set_fs(mm_segment_t x); +void set_fs(mm_segment_t x); +#else #define set_fs(x) (current_thread_info()->addr_limit = (x)) +#endif #define segment_eq(a, b) ((a).seg == (b).seg) @@ -52,7 +58,7 @@ __chk_user_ptr(addr); \ asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ : "=&r" (flag), "=r" (roksum) \ - : "1" (addr), "g" ((long)(size)), \ + : "1" (addr), "rm" ((long)(size)), \ "rm" (current_thread_info()->addr_limit.seg)); \ flag; \ }) @@ -76,7 +82,35 @@ * checks that the pointer is in the user space range - after calling * this function, memory access functions may still return -EFAULT. */ -#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok_noprefault(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok(type, addr, size) \ +({ \ + unsigned long __size = size; \ + unsigned long __addr = (unsigned long)addr; \ + bool __ret_ao = __range_not_ok(__addr, __size) == 0; \ + if (__ret_ao && __size) { \ + unsigned long __addr_ao = __addr & PAGE_MASK; \ + unsigned long __end_ao = __addr + __size - 1; \ + if (unlikely((__end_ao ^ __addr_ao) & PAGE_MASK)) { \ + while (__addr_ao <= __end_ao) { \ + char __c_ao; \ + __addr_ao += PAGE_SIZE; \ + if (__size > PAGE_SIZE) \ + cond_resched(); \ + if (__get_user(__c_ao, (char __user *)__addr)) \ + break; \ + if (type != VERIFY_WRITE) { \ + __addr = __addr_ao; \ + continue; \ + } \ + if (__put_user(__c_ao, (char __user *)__addr)) \ + break; \ + __addr = __addr_ao; \ + } \ + } \ + } \ + __ret_ao; \ +}) /* * The exception table consists of pairs of addresses: the first is the @@ -126,6 +160,15 @@ extern int __get_user_bad(void); /* Careful: we have to cast the result to the type of the pointer * for sign reasons */ +/* + * This is a type: either (un)signed int, if the argument fits into + * that type, or otherwise (un)signed long long. + */ +#define __inttype(x) \ +__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0U), \ + __builtin_choose_expr(__type_is_unsigned(__typeof__(x)), 0ULL, 0LL),\ + __builtin_choose_expr(__type_is_unsigned(__typeof__(x)), 0U, 0))) + /** * get_user: - Get a simple variable from user space. * @x: Variable to store result. @@ -154,7 +197,7 @@ extern int __get_user_bad(void); #define get_user(x, ptr) \ ({ \ int __ret_gu; \ - unsigned long __val_gu; \ + __inttype(*(ptr)) __val_gu; \ __chk_user_ptr(ptr); \ might_fault(); \ switch (sizeof(*(ptr))) { \ @@ -182,12 +225,20 @@ extern int __get_user_bad(void); asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") - +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __copyuser_seg "gs;" +#define __COPYUSER_SET_ES "pushl %%gs; popl %%es\n" +#define __COPYUSER_RESTORE_ES "pushl %%ss; popl %%es\n" +#else +#define __copyuser_seg +#define __COPYUSER_SET_ES +#define __COPYUSER_RESTORE_ES +#endif #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile("1: movl %%eax,0(%2)\n" \ - "2: movl %%edx,4(%2)\n" \ + asm volatile("1: "__copyuser_seg"movl %%eax,0(%2)\n" \ + "2: "__copyuser_seg"movl %%edx,4(%2)\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ @@ -199,8 +250,8 @@ extern int __get_user_bad(void); : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile("1: movl %%eax,0(%1)\n" \ - "2: movl %%edx,4(%1)\n" \ + asm volatile("1: "__copyuser_seg"movl %%eax,0(%1)\n" \ + "2: "__copyuser_seg"movl %%edx,4(%1)\n" \ "3:\n" \ _ASM_EXTABLE(1b, 2b - 1b) \ _ASM_EXTABLE(2b, 3b - 2b) \ @@ -252,7 +303,7 @@ extern void __put_user_8(void); __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ might_fault(); \ - __pu_val = x; \ + __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_x(1, __pu_val, ptr, __ret_pu); \ @@ -373,7 +424,7 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %2,%"rtype"1\n"\ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ @@ -381,7 +432,7 @@ do { \ " jmp 2b\n" \ ".previous\n" \ _ASM_EXTABLE(1b, 3b) \ - : "=r" (err), ltype(x) \ + : "=r" (err), ltype (x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) #define __get_user_size_ex(x, ptr, size) \ @@ -406,7 +457,7 @@ do { \ } while (0) #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %1,%"rtype"0\n"\ "2:\n" \ _ASM_EXTABLE(1b, 2b - 1b) \ : ltype(x) : "m" (__m(addr))) @@ -423,13 +474,24 @@ do { \ int __gu_err; \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; -#define __m(x) (*(struct __large_struct __user *)(x)) +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define ____m(x) \ +({ \ + unsigned long ____x = (unsigned long)(x); \ + if (____x < pax_user_shadow_base) \ + ____x += pax_user_shadow_base; \ + (typeof(x))____x; \ +}) +#else +#define ____m(x) (x) +#endif +#define __m(x) (*(struct __large_struct __user *)____m(x)) /* * Tell gcc we read from memory instead of writing: this is because @@ -437,7 +499,7 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %"rtype"1,%2\n"\ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ @@ -445,10 +507,10 @@ struct __large_struct { unsigned long buf[100]; }; ".previous\n" \ _ASM_EXTABLE(1b, 3b) \ : "=r"(err) \ - : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) + : ltype (x), "m" (__m(addr)), "i" (errret), "0" (err)) #define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ - asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + asm volatile("1: "__copyuser_seg"mov"itype" %"rtype"0,%1\n"\ "2:\n" \ _ASM_EXTABLE(1b, 2b - 1b) \ : : ltype(x), "m" (__m(addr))) @@ -487,8 +549,12 @@ struct __large_struct { unsigned long buf[100]; }; * On error, the variable @x is set to zero. */ +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __get_user(x, ptr) get_user((x), (ptr)) +#else #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr))) +#endif /** * __put_user: - Write a simple value into user space, with less checking. @@ -510,8 +576,12 @@ struct __large_struct { unsigned long buf[100]; }; * Returns zero on success, or -EFAULT on error. */ +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __put_user(x, ptr) put_user((x), (ptr)) +#else #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#endif #define __get_user_unaligned __get_user #define __put_user_unaligned __put_user @@ -529,7 +599,7 @@ struct __large_struct { unsigned long buf[100]; }; #define get_user_ex(x, ptr) do { \ unsigned long __gue_val; \ __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ - (x) = (__force __typeof__(*(ptr)))__gue_val; \ + (x) = (__typeof__(*(ptr)))__gue_val; \ } while (0) #ifdef CONFIG_X86_WP_WORKS_OK diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803..9540707 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -43,6 +43,11 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero static __always_inline unsigned long __must_check __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + if ((long)n < 0) + return n; + + check_object_size(from, n, true); + if (__builtin_constant_p(n)) { unsigned long ret; @@ -82,12 +87,16 @@ static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { + if ((long)n < 0) + return n; + /* Avoid zeroing the tail if the copy fails.. * If 'n' is constant and 1, 2, or 4, we do still zero on a failure, * but as the zeroing behaviour is only significant when n is not @@ -137,6 +146,12 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + + if ((long)n < 0) + return n; + + check_object_size(to, n, false); + if (__builtin_constant_p(n)) { unsigned long ret; @@ -159,6 +174,10 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { might_fault(); + + if ((long)n < 0) + return n; + if (__builtin_constant_p(n)) { unsigned long ret; @@ -181,15 +200,19 @@ static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n) { - return __copy_from_user_ll_nocache_nozero(to, from, n); + if ((long)n < 0) + return n; + + return __copy_from_user_ll_nocache_nozero(to, from, n); } -unsigned long __must_check copy_to_user(void __user *to, - const void *from, unsigned long n); -unsigned long __must_check _copy_from_user(void *to, - const void __user *from, - unsigned long n); - +extern void copy_to_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_to_user() buffer size is not provably correct") +#else + __compiletime_warning("copy_to_user() buffer size is not provably correct") +#endif +; extern void copy_from_user_overflow(void) #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS @@ -199,17 +222,60 @@ extern void copy_from_user_overflow(void) #endif ; -static inline unsigned long __must_check copy_from_user(void *to, - const void __user *from, - unsigned long n) +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) { - int sz = __compiletime_object_size(to); + size_t sz = __compiletime_object_size(from); - if (likely(sz == -1 || sz >= n)) - n = _copy_from_user(to, from, n); - else + if (unlikely(sz != (size_t)-1 && sz < n)) + copy_to_user_overflow(); + else if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + size_t sz = __compiletime_object_size(to); + + check_object_size(to, n, false); + + if (unlikely(sz != (size_t)-1 && sz < n)) copy_from_user_overflow(); - + else if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else if ((long)n > 0) + memset(to, 0, n); return n; } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30..6f1d97a 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -10,6 +10,9 @@ #include #include #include +#include + +#define set_fs(x) (current_thread_info()->addr_limit = (x)) /* * Copy To/From Userspace @@ -17,12 +20,12 @@ /* Handles exceptions in both to and from, but doesn't do access_ok */ __must_check unsigned long -copy_user_generic_string(void *to, const void *from, unsigned len); +copy_user_generic_string(void *to, const void *from, unsigned long len) __size_overflow(3); __must_check unsigned long -copy_user_generic_unrolled(void *to, const void *from, unsigned len); +copy_user_generic_unrolled(void *to, const void *from, unsigned long len) __size_overflow(3); static __always_inline __must_check unsigned long -copy_user_generic(void *to, const void *from, unsigned len) +copy_user_generic(void *to, const void *from, unsigned long len) { unsigned ret; @@ -36,138 +39,200 @@ copy_user_generic(void *to, const void *from, unsigned len) return ret; } +static __always_inline __must_check unsigned long +__copy_to_user(void __user *to, const void *from, unsigned long len); +static __always_inline __must_check unsigned long +__copy_from_user(void *to, const void __user *from, unsigned long len); __must_check unsigned long -_copy_to_user(void __user *to, const void *from, unsigned len); -__must_check unsigned long -_copy_from_user(void *to, const void __user *from, unsigned len); -__must_check unsigned long -copy_in_user(void __user *to, const void __user *from, unsigned len); +copy_in_user(void __user *to, const void __user *from, unsigned long len); + +extern void copy_to_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_to_user() buffer size is not provably correct") +#else + __compiletime_warning("copy_to_user() buffer size is not provably correct") +#endif +; + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - int sz = __compiletime_object_size(to); - might_fault(); - if (likely(sz == -1 || sz >= n)) - n = _copy_from_user(to, from, n); -#ifdef CONFIG_DEBUG_VM - else - WARN(1, "Buffer overflow detected!\n"); -#endif + + check_object_size(to, n, false); + + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else if (n < INT_MAX) + memset(to, 0, n); return n; } static __always_inline __must_check -int copy_to_user(void __user *dst, const void *src, unsigned size) +int copy_to_user(void __user *dst, const void *src, unsigned long size) { might_fault(); - return _copy_to_user(dst, src, size); + if (access_ok(VERIFY_WRITE, dst, size)) + size = __copy_to_user(dst, src, size); + return size; } static __always_inline __must_check -int __copy_from_user(void *dst, const void __user *src, unsigned size) +unsigned long __copy_from_user(void *dst, const void __user *src, unsigned long size) { - int ret = 0; + size_t sz = __compiletime_object_size(dst); + unsigned ret = 0; might_fault(); + + if (size > INT_MAX) + return size; + + check_object_size(dst, size, false); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!access_ok_noprefault(VERIFY_READ, src, size)) + return size; +#endif + + if (unlikely(sz != (size_t)-1 && sz < size)) { + copy_from_user_overflow(); + return size; + } + if (!__builtin_constant_p(size)) - return copy_user_generic(dst, (__force void *)src, size); + return copy_user_generic(dst, (__force_kernel const void *)____m(src), size); switch (size) { - case 1:__get_user_asm(*(u8 *)dst, (u8 __user *)src, + case 1:__get_user_asm(*(u8 *)dst, (const u8 __user *)src, ret, "b", "b", "=q", 1); return ret; - case 2:__get_user_asm(*(u16 *)dst, (u16 __user *)src, + case 2:__get_user_asm(*(u16 *)dst, (const u16 __user *)src, ret, "w", "w", "=r", 2); return ret; - case 4:__get_user_asm(*(u32 *)dst, (u32 __user *)src, + case 4:__get_user_asm(*(u32 *)dst, (const u32 __user *)src, ret, "l", "k", "=r", 4); return ret; - case 8:__get_user_asm(*(u64 *)dst, (u64 __user *)src, + case 8:__get_user_asm(*(u64 *)dst, (const u64 __user *)src, ret, "q", "", "=r", 8); return ret; case 10: - __get_user_asm(*(u64 *)dst, (u64 __user *)src, + __get_user_asm(*(u64 *)dst, (const u64 __user *)src, ret, "q", "", "=r", 10); if (unlikely(ret)) return ret; __get_user_asm(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), + (const u16 __user *)(8 + (const char __user *)src), ret, "w", "w", "=r", 2); return ret; case 16: - __get_user_asm(*(u64 *)dst, (u64 __user *)src, + __get_user_asm(*(u64 *)dst, (const u64 __user *)src, ret, "q", "", "=r", 16); if (unlikely(ret)) return ret; __get_user_asm(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), + (const u64 __user *)(8 + (const char __user *)src), ret, "q", "", "=r", 8); return ret; default: - return copy_user_generic(dst, (__force void *)src, size); + return copy_user_generic(dst, (__force_kernel const void *)____m(src), size); } } static __always_inline __must_check -int __copy_to_user(void __user *dst, const void *src, unsigned size) +unsigned long __copy_to_user(void __user *dst, const void *src, unsigned long size) { - int ret = 0; + size_t sz = __compiletime_object_size(src); + unsigned ret = 0; might_fault(); + + if (size > INT_MAX) + return size; + + check_object_size(src, size, true); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!access_ok_noprefault(VERIFY_WRITE, dst, size)) + return size; +#endif + + if (unlikely(sz != (size_t)-1 && sz < size)) { + copy_to_user_overflow(); + return size; + } + if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, src, size); + return copy_user_generic((__force_kernel void *)____m(dst), src, size); switch (size) { - case 1:__put_user_asm(*(u8 *)src, (u8 __user *)dst, + case 1:__put_user_asm(*(const u8 *)src, (u8 __user *)dst, ret, "b", "b", "iq", 1); return ret; - case 2:__put_user_asm(*(u16 *)src, (u16 __user *)dst, + case 2:__put_user_asm(*(const u16 *)src, (u16 __user *)dst, ret, "w", "w", "ir", 2); return ret; - case 4:__put_user_asm(*(u32 *)src, (u32 __user *)dst, + case 4:__put_user_asm(*(const u32 *)src, (u32 __user *)dst, ret, "l", "k", "ir", 4); return ret; - case 8:__put_user_asm(*(u64 *)src, (u64 __user *)dst, + case 8:__put_user_asm(*(const u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 8); return ret; case 10: - __put_user_asm(*(u64 *)src, (u64 __user *)dst, + __put_user_asm(*(const u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 10); if (unlikely(ret)) return ret; asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, + __put_user_asm(4[(const u16 *)src], 4 + (u16 __user *)dst, ret, "w", "w", "ir", 2); return ret; case 16: - __put_user_asm(*(u64 *)src, (u64 __user *)dst, + __put_user_asm(*(const u64 *)src, (u64 __user *)dst, ret, "q", "", "er", 16); if (unlikely(ret)) return ret; asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, + __put_user_asm(1[(const u64 *)src], 1 + (u64 __user *)dst, ret, "q", "", "er", 8); return ret; default: - return copy_user_generic((__force void *)dst, src, size); + return copy_user_generic((__force_kernel void *)____m(dst), src, size); } } static __always_inline __must_check -int __copy_in_user(void __user *dst, const void __user *src, unsigned size) +unsigned long __copy_in_user(void __user *dst, const void __user *src, unsigned long size) { - int ret = 0; + unsigned ret = 0; might_fault(); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!access_ok_noprefault(VERIFY_READ, src, size)) + return size; + if (!access_ok_noprefault(VERIFY_WRITE, dst, size)) + return size; +#endif + if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, - (__force void *)src, size); + return copy_user_generic((__force_kernel void *)____m(dst), + (__force_kernel const void *)____m(src), size); switch (size) { case 1: { u8 tmp; - __get_user_asm(tmp, (u8 __user *)src, + __get_user_asm(tmp, (const u8 __user *)src, ret, "b", "b", "=q", 1); if (likely(!ret)) __put_user_asm(tmp, (u8 __user *)dst, @@ -176,7 +241,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } case 2: { u16 tmp; - __get_user_asm(tmp, (u16 __user *)src, + __get_user_asm(tmp, (const u16 __user *)src, ret, "w", "w", "=r", 2); if (likely(!ret)) __put_user_asm(tmp, (u16 __user *)dst, @@ -186,7 +251,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) case 4: { u32 tmp; - __get_user_asm(tmp, (u32 __user *)src, + __get_user_asm(tmp, (const u32 __user *)src, ret, "l", "k", "=r", 4); if (likely(!ret)) __put_user_asm(tmp, (u32 __user *)dst, @@ -195,7 +260,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } case 8: { u64 tmp; - __get_user_asm(tmp, (u64 __user *)src, + __get_user_asm(tmp, (const u64 __user *)src, ret, "q", "", "=r", 8); if (likely(!ret)) __put_user_asm(tmp, (u64 __user *)dst, @@ -203,8 +268,8 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) return ret; } default: - return copy_user_generic((__force void *)dst, - (__force void *)src, size); + return copy_user_generic((__force_kernel void *)____m(dst), + (__force_kernel const void *)____m(src), size); } } @@ -218,36 +283,57 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -static __must_check __always_inline int -__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) +static __must_check __always_inline unsigned long +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned long size) { - return copy_user_generic(dst, (__force const void *)src, size); + if (size > INT_MAX) + return size; + + return copy_user_generic(dst, (__force_kernel const void *)____m(src), size); } -static __must_check __always_inline int -__copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) +static __must_check __always_inline unsigned long +__copy_to_user_inatomic(void __user *dst, const void *src, unsigned long size) { - return copy_user_generic((__force void *)dst, src, size); + if (size > INT_MAX) + return size; + + return copy_user_generic((__force_kernel void *)____m(dst), src, size); } -extern long __copy_user_nocache(void *dst, const void __user *src, - unsigned size, int zerorest); +extern unsigned long __copy_user_nocache(void *dst, const void __user *src, + unsigned long size, int zerorest); -static inline int -__copy_from_user_nocache(void *dst, const void __user *src, unsigned size) +static inline unsigned long __copy_from_user_nocache(void *dst, const void __user *src, unsigned long size) { might_sleep(); + + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!access_ok_noprefault(VERIFY_READ, src, size)) + return size; +#endif + return __copy_user_nocache(dst, src, size, 1); } -static inline int -__copy_from_user_inatomic_nocache(void *dst, const void __user *src, - unsigned size) +static inline unsigned long __copy_from_user_inatomic_nocache(void *dst, const void __user *src, + unsigned long size) { + if (size > INT_MAX) + return size; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!access_ok_noprefault(VERIFY_READ, src, size)) + return size; +#endif + return __copy_user_nocache(dst, src, size, 0); } -unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); +extern unsigned long +copy_user_handle_tail(char __user *to, char __user *from, unsigned long len, unsigned zerorest) __size_overflow(3); #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 0431f19..4895926 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -535,7 +535,7 @@ __SYSCALL(__NR_tgkill, sys_tgkill) #define __NR_utimes 235 __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 -__SYSCALL(__NR_vserver, sys_ni_syscall) +__SYSCALL(__NR_vserver, sys_vserver) #define __NR_mbind 237 __SYSCALL(__NR_mbind, sys_mbind) #define __NR_set_mempolicy 238 diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bb05228..d763d5b 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -11,7 +11,7 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ }) #endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1971e65..1e07354 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -139,7 +139,7 @@ struct x86_init_ops { struct x86_init_timers timers; struct x86_init_iommu iommu; struct x86_init_pci pci; -}; +} __no_const; /** * struct x86_cpuinit_ops - platform specific cpu hotplug setups @@ -147,7 +147,7 @@ struct x86_init_ops { */ struct x86_cpuinit_ops { void (*setup_percpu_clockev)(void); -}; +} __no_const; /** * struct x86_platform_ops - platform specific runtime functions @@ -169,7 +169,7 @@ struct x86_platform_ops { void (*nmi_init)(void); unsigned char (*get_nmi_reason)(void); int (*i8042_detect)(void); -}; +} __no_const; struct pci_dev; @@ -177,7 +177,7 @@ struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); -}; +} __no_const; extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c34f96c..e26f052 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -54,7 +54,7 @@ extern int m2p_remove_override(struct page *page, bool clear_pte); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); -static inline unsigned long pfn_to_mfn(unsigned long pfn) +static inline unsigned long __intentional_overflow(-1) pfn_to_mfn(unsigned long pfn) { unsigned long mfn; diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index c6ce245..aab6adb 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -65,6 +65,8 @@ static inline int xsave_user(struct xsave_struct __user *buf) { int err; + buf = (struct xsave_struct __user *)____m(buf); + /* * Clear the xsave header first, so that reserved fields are * initialized to zero. @@ -74,7 +76,9 @@ static inline int xsave_user(struct xsave_struct __user *buf) if (unlikely(err)) return -EFAULT; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + __asm__ __volatile__("1:" + __copyuser_seg + ".byte " REX_PREFIX "0x0f,0xae,0x27\n" "2:\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" @@ -96,11 +100,13 @@ static inline int xsave_user(struct xsave_struct __user *buf) static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) { int err; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + struct xsave_struct *xstate = ((__force_kernel struct xsave_struct *)____m(buf)); u32 lmask = mask; u32 hmask = mask >> 32; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + __asm__ __volatile__("1:" + __copyuser_seg + ".byte " REX_PREFIX "0x0f,0xae,0x2f\n" "2:\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 479d03c..2450277 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1345,7 +1345,7 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) * If your system is blacklisted here, but you find that acpi=force * works for you, please contact linux-acpi@vger.kernel.org */ -static struct dmi_system_id __initdata acpi_dmi_table[] = { +static const struct dmi_system_id __initconst acpi_dmi_table[] = { /* * Boxes that need ACPI disabled */ @@ -1420,7 +1420,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { }; /* second table for DMI checks that should run after early-quirks */ -static struct dmi_system_id __initdata acpi_dmi_table_late[] = { +static const struct dmi_system_id __initconst acpi_dmi_table_late[] = { /* * HP laptops which use a DSDT reporting as HP/SB400/10000, * which includes some code which overrides all temperature diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 6a564ac..3f3a3d7 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -41,6 +41,9 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) KBUILD_CFLAGS += $(call cc-option, -m32) +ifdef CONSTIFY_PLUGIN +KBUILD_CFLAGS += -fplugin-arg-constify_plugin-no-constify +endif KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index b4fd836..4358fe3 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -108,6 +108,9 @@ wakeup_code: /* Do any other stuff... */ #ifndef CONFIG_64BIT + /* Recheck NX bit overrides (64bit path does this in trampoline */ + call verify_cpu + /* This could also be done in C code... */ movl pmode_cr3, %eax movl %eax, %cr3 @@ -131,6 +134,7 @@ wakeup_code: movl pmode_cr0, %eax movl %eax, %cr0 jmp pmode_return +# include "../../verify_cpu.S" #else pushw $0 pushw trampoline_segment diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 103b6ab..2004d0a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -94,8 +94,12 @@ int acpi_suspend_lowlevel(void) header->trampoline_segment = trampoline_address() >> 4; #ifdef CONFIG_SMP stack_start = (unsigned long)temp_stack + sizeof(temp_stack); + + pax_open_kernel(); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + pax_close_kernel(); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 13ab7205..95d5442 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -30,13 +30,11 @@ wakeup_pmode_return: # and restore the stack ... but you need gdt for this to work movl saved_context_esp, %esp - movl %cs:saved_magic, %eax - cmpl $0x12345678, %eax + cmpl $0x12345678, saved_magic jne bogus_magic # jump to place where we left off - movl saved_eip, %eax - jmp *%eax + jmp *(saved_eip) bogus_magic: jmp bogus_magic diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index bda833c..a9bdd97 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -276,6 +276,13 @@ void __init_or_module apply_alternatives(struct alt_instr *start, */ for (a = start; a < end; a++) { instr = (u8 *)&a->instr_offset + a->instr_offset; + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + instr += ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR; + if (instr < (u8 *)_text || (u8 *)_einittext <= instr) + instr -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR; +#endif + replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->replacementlen > a->instrlen); BUG_ON(a->instrlen > sizeof(insnbuf)); @@ -307,10 +314,16 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + ptr += ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR; + if (ptr < (u8 *)_text || (u8 *)_einittext <= ptr) + ptr -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR; +#endif + if (!*poff || ptr < text || ptr >= text_end) continue; /* turn DS segment override prefix into lock prefix */ - if (*ptr == 0x3e) + if (*ktla_ktva(ptr) == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); }; mutex_unlock(&text_mutex); @@ -328,10 +341,16 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + ptr += ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR; + if (ptr < (u8 *)_text || (u8 *)_einittext <= ptr) + ptr -= ____LOAD_PHYSICAL_ADDR - LOAD_PHYSICAL_ADDR; +#endif + if (!*poff || ptr < text || ptr >= text_end) continue; /* turn lock prefix into DS segment override prefix */ - if (*ptr == 0xf0) + if (*ktla_ktva(ptr) == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); }; mutex_unlock(&text_mutex); @@ -500,7 +519,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ - memcpy(insnbuf, p->instr, p->len); + memcpy(insnbuf, ktla_ktva(p->instr), p->len); used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, (unsigned long)p->instr, p->len); @@ -568,7 +587,7 @@ void __init alternative_instructions(void) if (smp_alt_once) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); + PAGE_ALIGN((unsigned long)__smp_locks_end)); restart_nmi(); } @@ -585,13 +604,17 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -void *__init_or_module text_poke_early(void *addr, const void *opcode, +void *__kprobes text_poke_early(void *addr, const void *opcode, size_t len) { unsigned long flags; local_irq_save(flags); - memcpy(addr, opcode, len); + + pax_open_kernel(); + memcpy(ktla_ktva(addr), opcode, len); sync_core(); + pax_close_kernel(); + local_irq_restore(flags); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ @@ -613,36 +636,22 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, */ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) { - unsigned long flags; - char *vaddr; + unsigned char *vaddr = ktla_ktva(addr); struct page *pages[2]; - int i; + size_t i; if (!core_kernel_text((unsigned long)addr)) { - pages[0] = vmalloc_to_page(addr); - pages[1] = vmalloc_to_page(addr + PAGE_SIZE); + pages[0] = vmalloc_to_page(vaddr); + pages[1] = vmalloc_to_page(vaddr + PAGE_SIZE); } else { - pages[0] = virt_to_page(addr); + pages[0] = virt_to_page(vaddr); WARN_ON(!PageReserved(pages[0])); - pages[1] = virt_to_page(addr + PAGE_SIZE); + pages[1] = virt_to_page(vaddr + PAGE_SIZE); } BUG_ON(!pages[0]); - local_irq_save(flags); - set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); - if (pages[1]) - set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); - vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - clear_fixmap(FIX_TEXT_POKE0); - if (pages[1]) - clear_fixmap(FIX_TEXT_POKE1); - local_flush_tlb(); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ + text_poke_early(addr, opcode, len); for (i = 0; i < len; i++) - BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); - local_irq_restore(flags); + BUG_ON((vaddr)[i] != ((const unsigned char *)opcode)[i]); return addr; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 838a3b4..71de0f5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -174,7 +174,7 @@ int first_system_vector = 0xfe; /* * Debug level, exported for io_apic.c */ -unsigned int apic_verbosity; +int apic_verbosity; int pic_mode; @@ -1859,7 +1859,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_write(APIC_ESR, 0); v1 = apic_read(APIC_ESR); ack_APIC_irq(); - atomic_inc(&irq_err_count); + atomic_inc_unchecked(&irq_err_count); apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", smp_processor_id(), v0 , v1); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index f7a41e4..be25d88 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -171,7 +171,7 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -static struct apic apic_flat = { +static struct apic apic_flat __read_only = { .name = "flat", .probe = NULL, .acpi_madt_oem_check = flat_acpi_madt_oem_check, @@ -327,7 +327,7 @@ static int physflat_probe(void) return 0; } -static struct apic apic_physflat = { +static struct apic apic_physflat __read_only = { .name = "physical flat", .probe = physflat_probe, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 775b82b..841f78b 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -119,7 +119,7 @@ static void noop_apic_write(u32 reg, u32 v) WARN_ON_ONCE(cpu_has_apic && !disable_apic); } -struct apic apic_noop = { +struct apic apic_noop __read_only = { .name = "noop", .probe = noop_probe, .acpi_madt_oem_check = NULL, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 521bead..a724871 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -193,7 +193,7 @@ static int probe_bigsmp(void) return dmi_bigsmp; } -static struct apic apic_bigsmp = { +static struct apic apic_bigsmp __read_only = { .name = "bigsmp", .probe = probe_bigsmp, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 5d513bc..6a51935 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -619,8 +619,7 @@ static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, return ret && es7000_apic_is_cluster(); } -/* We've been warned by a false positive warning.Use __refdata to keep calm. */ -static struct apic __refdata apic_es7000_cluster = { +static struct apic apic_es7000_cluster __read_only = { .name = "es7000", .probe = probe_es7000, @@ -685,7 +684,7 @@ static struct apic __refdata apic_es7000_cluster = { .x86_32_early_logical_apicid = es7000_early_logical_apicid, }; -static struct apic __refdata apic_es7000 = { +static struct apic apic_es7000 __read_only = { .name = "es7000", .probe = probe_es7000, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 6d939d7..a93a87e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1096,7 +1096,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -void lock_vector_lock(void) +void lock_vector_lock(void) __acquires(vector_lock) { /* Used to the online set of cpus does not change * during assign_irq_vector. @@ -1104,7 +1104,7 @@ void lock_vector_lock(void) raw_spin_lock(&vector_lock); } -void unlock_vector_lock(void) +void unlock_vector_lock(void) __releases(vector_lock) { raw_spin_unlock(&vector_lock); } @@ -2510,7 +2510,7 @@ static void ack_apic_edge(struct irq_data *data) ack_APIC_irq(); } -atomic_t irq_mis_count; +atomic_unchecked_t irq_mis_count; static void ack_apic_level(struct irq_data *data) { @@ -2576,7 +2576,7 @@ static void ack_apic_level(struct irq_data *data) * at the cpu. */ if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); + atomic_inc_unchecked(&irq_mis_count); eoi_ioapic_irq(irq, cfg); } @@ -2634,17 +2634,20 @@ static void ir_print_prefix(struct irq_data *data, struct seq_file *p) static void irq_remap_modify_chip_defaults(struct irq_chip *chip) { - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; + pax_open_kernel(); + *(void **)&chip->irq_print_chip = ir_print_prefix; + *(void **)&chip->irq_ack = ir_ack_apic_edge; + *(void **)&chip->irq_eoi = ir_ack_apic_level; #ifdef CONFIG_SMP - chip->irq_set_affinity = ir_ioapic_set_affinity; + *(void **)&chip->irq_set_affinity = ir_ioapic_set_affinity; #endif + + pax_close_kernel(); } #endif /* CONFIG_IRQ_REMAP */ -static struct irq_chip ioapic_chip __read_mostly = { +static struct irq_chip ioapic_chip = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, .irq_mask = mask_ioapic_irq, @@ -2715,7 +2718,7 @@ static void ack_lapic_irq(struct irq_data *data) ack_APIC_irq(); } -static struct irq_chip lapic_chip __read_mostly = { +static struct irq_chip lapic_chip = { .name = "local-APIC", .irq_mask = mask_lapic_irq, .irq_unmask = unmask_lapic_irq, diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index c4a61ca..4c63d32 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -472,8 +472,7 @@ static void numaq_setup_portio_remap(void) (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); } -/* Use __refdata to keep false positive warning calm. */ -static struct apic __refdata apic_numaq = { +static struct apic apic_numaq __read_only = { .name = "NUMAQ", .probe = probe_numaq, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0787bb3..e222a80 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -87,7 +87,7 @@ static int probe_default(void) return 1; } -static struct apic apic_default = { +static struct apic apic_default __read_only = { .name = "default", .probe = probe_default, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 1911442..2424a83 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -491,7 +491,7 @@ void setup_summit(void) } #endif -static struct apic apic_summit = { +static struct apic apic_summit __read_only = { .name = "summit", .probe = probe_summit, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 5007958..2eba140 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -182,7 +182,7 @@ update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) return notifier_from_errno(err); } -static struct notifier_block __refdata x2apic_cpu_notifier = { +static struct notifier_block x2apic_cpu_notifier = { .notifier_call = update_clusterinfo, }; @@ -208,7 +208,7 @@ static int x2apic_cluster_probe(void) return 0; } -static struct apic apic_x2apic_cluster = { +static struct apic apic_x2apic_cluster __read_only = { .name = "cluster x2apic", .probe = x2apic_cluster_probe, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index db4f704..2d4f409 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -121,7 +121,7 @@ static int x2apic_phys_probe(void) return apic == &apic_x2apic_phys; } -static struct apic apic_x2apic_phys = { +static struct apic apic_x2apic_phys __read_only = { .name = "physical x2apic", .probe = x2apic_phys_probe, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 79b05b8..bc1d972 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -346,7 +346,7 @@ static int uv_probe(void) return apic == &apic_x2apic_uv_x; } -static struct apic __refdata apic_x2apic_uv_x = { +static struct apic apic_x2apic_uv_x __read_only = { .name = "UV large system", .probe = uv_probe, diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index a46bd38..92f6c9c 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -411,7 +411,7 @@ static DEFINE_MUTEX(apm_mutex); * This is for buggy BIOS's that refer to (real mode) segment 0x40 * even though they are called in protected mode. */ -static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, +static const struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4093, (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1); static const char driver_version[] = "1.16ac"; /* no spaces */ @@ -589,7 +589,10 @@ static long __apm_bios_call(void *_call) BUG_ON(cpu != 0); gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; + + pax_open_kernel(); gdt[0x40 / 8] = bad_bios_desc; + pax_close_kernel(); apm_irq_save(flags); APM_DO_SAVE_SEGS; @@ -598,7 +601,11 @@ static long __apm_bios_call(void *_call) &call->esi); APM_DO_RESTORE_SEGS; apm_irq_restore(flags); + + pax_open_kernel(); gdt[0x40 / 8] = save_desc_40; + pax_close_kernel(); + put_cpu(); return call->eax & 0xff; @@ -665,7 +672,10 @@ static long __apm_bios_call_simple(void *_call) BUG_ON(cpu != 0); gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; + + pax_open_kernel(); gdt[0x40 / 8] = bad_bios_desc; + pax_close_kernel(); apm_irq_save(flags); APM_DO_SAVE_SEGS; @@ -673,7 +683,11 @@ static long __apm_bios_call_simple(void *_call) &call->eax); APM_DO_RESTORE_SEGS; apm_irq_restore(flags); + + pax_open_kernel(); gdt[0x40 / 8] = save_desc_40; + pax_close_kernel(); + put_cpu(); return error; } @@ -2037,7 +2051,7 @@ static int __init swab_apm_power_in_minutes(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id __initdata apm_dmi_table[] = { +static const struct dmi_system_id __initconst apm_dmi_table[] = { { print_if_true, KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.", @@ -2347,12 +2361,15 @@ static int __init apm_init(void) * code to that CPU. */ gdt = get_cpu_gdt_table(0); + + pax_open_kernel(); set_desc_base(&gdt[APM_CS >> 3], (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4)); set_desc_base(&gdt[APM_CS_16 >> 3], (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4)); set_desc_base(&gdt[APM_DS >> 3], (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); + pax_close_kernel(); proc_create("apm", 0, NULL, &apm_file_ops); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 4f13faf..87db5d2 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -33,6 +33,8 @@ void common(void) { OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_lowest_stack, thread_info, lowest_stack); + DEFINE(TI_task_thread_sp0, offsetof(struct task_struct, thread.sp0) - offsetof(struct task_struct, tinfo)); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); @@ -53,8 +55,26 @@ void common(void) { OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); + +#ifdef CONFIG_PAX_KERNEXEC + OFFSET(PV_CPU_write_cr0, pv_cpu_ops, write_cr0); #endif +#ifdef CONFIG_PAX_MEMORY_UDEREF + OFFSET(PV_MMU_read_cr3, pv_mmu_ops, read_cr3); + OFFSET(PV_MMU_write_cr3, pv_mmu_ops, write_cr3); +#ifdef CONFIG_X86_64 + OFFSET(PV_MMU_set_pgd_batched, pv_mmu_ops, set_pgd_batched); +#endif +#endif + +#endif + + BLANK(); + DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); + DEFINE(THREAD_SIZE_asm, THREAD_SIZE); + #ifdef CONFIG_XEN BLANK(); OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72a119..6e2955d 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -69,6 +69,7 @@ int main(void) BLANK(); #undef ENTRY + DEFINE(TSS_size, sizeof(struct tss_struct)); OFFSET(TSS_ist, tss_struct, x86_tss.ist); BLANK(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 25f24dc..4094a7f 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,10 +8,6 @@ CFLAGS_REMOVE_common.o = -pg CFLAGS_REMOVE_perf_event.o = -pg endif -# Make sure load_percpu_segment has no stackprotector -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_common.o := $(nostackp) - obj-y := intel_cacheinfo.o scattered.o topology.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o sched.o mshyperv.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60d4c33..3f51857 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -711,7 +711,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_mask == 0) size = 64; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6284d6d..60561cb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -84,60 +84,6 @@ static const struct cpu_dev __cpuinitconst default_cpu = { static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { -#ifdef CONFIG_X86_64 - /* - * We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - * - * TLS descriptors are currently at a different place compared to i386. - * Hopefully nobody expects them at a fixed place (Wine?) - */ - [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), -#else - [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), - [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), - [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - /* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - /* 16-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), - /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - /* 32-bit code */ - [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), - /* data */ - [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), - - [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), - GDT_STACK_CANARY_INIT -#endif -} }; -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - static int __init x86_xsave_setup(char *s) { if (strlen(s)) @@ -374,7 +320,7 @@ void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - gdt_descr.address = (long)get_cpu_gdt_table(cpu); + gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); /* Reload the per-cpu base */ @@ -769,6 +715,16 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) setup_smep(c); +#ifdef CONFIG_X86_32 +#ifdef CONFIG_PAX_PAGEEXEC + if (!(__supported_pte_mask & _PAGE_NX)) + clear_cpu_cap(c, X86_FEATURE_PSE); +#endif +#if defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) + clear_cpu_cap(c, X86_FEATURE_SEP); +#endif +#endif + get_model_name(c); /* Default name */ detect_nopl(c); @@ -1021,6 +977,9 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); +DEFINE_PER_CPU(struct thread_info *, current_tinfo) = &init_task.tinfo; +EXPORT_PER_CPU_SYMBOL(current_tinfo); + #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; @@ -1036,7 +995,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; + (unsigned long)&init_thread_union - 16 + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(char *, irq_stack_ptr) = @@ -1101,7 +1060,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; - regs->gs = __KERNEL_STACK_CANARY; + savesegment(gs, regs->gs); return regs; } @@ -1156,7 +1115,7 @@ void __cpuinit cpu_init(void) int i; cpu = stack_smp_processor_id(); - t = &per_cpu(init_tss, cpu); + t = init_tss + cpu; oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1182,7 +1141,7 @@ void __cpuinit cpu_init(void) switch_to_new_gdt(cpu); loadsegment(fs, 0); - load_idt((const struct desc_ptr *)&idt_descr); + load_idt(&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); @@ -1191,7 +1150,6 @@ void __cpuinit cpu_init(void) wrmsrl(MSR_KERNEL_GS_BASE, 0); barrier(); - x86_configure_nx(); if (cpu != 0) enable_x2apic(); @@ -1245,7 +1203,7 @@ void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = init_tss + cpu; struct thread_struct *thread = &curr->thread; if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e7a64dd..6a192f6 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -189,7 +189,7 @@ static void __cpuinit trap_init_f00f_bug(void) * Update the IDT descriptor and reload the IDT so that * it uses the read-only mapped virtual address. */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); + idt_descr.address = (struct desc_struct *)fix_to_virt(FIX_F00F_IDT); load_idt(&idt_descr); } #endif diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0e89635..f0a7525 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -984,6 +984,22 @@ static struct attribute *default_attrs[] = { }; #ifdef CONFIG_AMD_NB +static struct attribute *default_attrs_amd_nb[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &physical_line_partition.attr, + &ways_of_associativity.attr, + &number_of_sets.attr, + &size.attr, + &shared_cpu_map.attr, + &shared_cpu_list.attr, + NULL, + NULL, + NULL, + NULL +}; + static struct attribute ** __cpuinit amd_l3_attrs(void) { static struct attribute **attrs; @@ -994,18 +1010,7 @@ static struct attribute ** __cpuinit amd_l3_attrs(void) n = sizeof (default_attrs) / sizeof (struct attribute *); - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - n += 2; - - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - n += 1; - - attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); - if (attrs == NULL) - return attrs = default_attrs; - - for (n = 0; default_attrs[n]; n++) - attrs[n] = default_attrs[n]; + attrs = default_attrs_amd_nb; if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { attrs[n++] = &cache_disable_0.attr; @@ -1056,6 +1061,13 @@ static struct kobj_type ktype_cache = { .default_attrs = default_attrs, }; +#ifdef CONFIG_AMD_NB +static struct kobj_type ktype_cache_amd_nb = { + .sysfs_ops = &sysfs_ops, + .default_attrs = default_attrs_amd_nb, +}; +#endif + static struct kobj_type ktype_percpu_entry = { .sysfs_ops = &sysfs_ops, }; @@ -1121,20 +1133,26 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) return retval; } +#ifdef CONFIG_AMD_NB + amd_l3_attrs(); +#endif + for (i = 0; i < num_cache_leaves; i++) { + struct kobj_type *ktype; + this_object = INDEX_KOBJECT_PTR(cpu, i); this_object->cpu = cpu; this_object->index = i; this_leaf = CPUID4_INFO_IDX(cpu, i); - ktype_cache.default_attrs = default_attrs; + ktype = &ktype_cache; #ifdef CONFIG_AMD_NB if (this_leaf->base.nb) - ktype_cache.default_attrs = amd_l3_attrs(); + ktype = &ktype_cache_amd_nb; #endif retval = kobject_init_and_add(&(this_object->kobj), - &ktype_cache, + ktype, per_cpu(ici_cache_kobject, cpu), "index%1lu", i); if (unlikely(retval)) { @@ -1189,7 +1207,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { +static struct notifier_block cacheinfo_cpu_notifier = { .notifier_call = cacheinfo_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3b67877..77e760c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "mce-internal.h" @@ -200,7 +201,7 @@ static void print_mce(struct mce *m) !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->ip); - if (m->cs == __KERNEL_CS) + if (m->cs == __KERNEL_CS || m->cs == __KERNEXEC_KERNEL_CS) print_symbol("{%s}", m->ip); pr_cont("\n"); } @@ -233,10 +234,10 @@ static void print_mce(struct mce *m) #define PANIC_TIMEOUT 5 /* 5 seconds */ -static atomic_t mce_paniced; +static atomic_unchecked_t mce_paniced; static int fake_panic; -static atomic_t mce_fake_paniced; +static atomic_unchecked_t mce_fake_paniced; /* Panic in progress. Enable interrupts and wait for final IPI */ static void wait_for_panic(void) @@ -260,7 +261,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) /* * Make sure only one CPU runs in machine check panic */ - if (atomic_inc_return(&mce_paniced) > 1) + if (atomic_inc_return_unchecked(&mce_paniced) > 1) wait_for_panic(); barrier(); @@ -268,7 +269,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) console_verbose(); } else { /* Don't log too much for fake panic */ - if (atomic_inc_return(&mce_fake_paniced) > 1) + if (atomic_inc_return_unchecked(&mce_fake_paniced) > 1) return; } /* First print corrected ones that are still unlogged */ @@ -307,7 +308,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) if (!fake_panic) { if (panic_timeout == 0) panic_timeout = mce_panic_timeout; - panic(msg); + panic("%s", msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); } @@ -616,7 +617,7 @@ static int mce_timed_out(u64 *t) * might have been modified by someone else. */ rmb(); - if (atomic_read(&mce_paniced)) + if (atomic_read_unchecked(&mce_paniced)) wait_for_panic(); if (!monarch_timeout) goto out; @@ -1404,7 +1405,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) } /* Call the installed machine check handler for this CPU setup. */ -void (*machine_check_vector)(struct pt_regs *, long error_code) = +void (*machine_check_vector)(struct pt_regs *, long error_code) __read_only = unexpected_machine_check; /* @@ -1427,7 +1428,9 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) return; } + pax_open_kernel(); machine_check_vector = do_machine_check; + pax_close_kernel(); __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); @@ -1441,7 +1444,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) */ static DEFINE_SPINLOCK(mce_chrdev_state_lock); -static int mce_chrdev_open_count; /* #times opened */ +static local_t mce_chrdev_open_count; /* #times opened */ static int mce_chrdev_open_exclu; /* already open exclusive? */ static int mce_chrdev_open(struct inode *inode, struct file *file) @@ -1449,7 +1452,7 @@ static int mce_chrdev_open(struct inode *inode, struct file *file) spin_lock(&mce_chrdev_state_lock); if (mce_chrdev_open_exclu || - (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { + (local_read(&mce_chrdev_open_count) && (file->f_flags & O_EXCL))) { spin_unlock(&mce_chrdev_state_lock); return -EBUSY; @@ -1457,7 +1460,7 @@ static int mce_chrdev_open(struct inode *inode, struct file *file) if (file->f_flags & O_EXCL) mce_chrdev_open_exclu = 1; - mce_chrdev_open_count++; + local_inc(&mce_chrdev_open_count); spin_unlock(&mce_chrdev_state_lock); @@ -1468,7 +1471,7 @@ static int mce_chrdev_release(struct inode *inode, struct file *file) { spin_lock(&mce_chrdev_state_lock); - mce_chrdev_open_count--; + local_dec(&mce_chrdev_open_count); mce_chrdev_open_exclu = 0; spin_unlock(&mce_chrdev_state_lock); @@ -2099,7 +2102,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block mce_cpu_notifier __cpuinitdata = { +static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; @@ -2109,7 +2112,7 @@ static __init void mce_init_banks(void) for (i = 0; i < banks; i++) { struct mce_bank *b = &mce_banks[i]; - struct sysdev_attribute *a = &b->attr; + sysdev_attribute_no_const *a = &b->attr; sysfs_attr_init(&a->attr); a->attr.name = b->attrname; @@ -2177,7 +2180,7 @@ struct dentry *mce_get_debugfs_dir(void) static void mce_reset(void) { cpu_missing = 0; - atomic_set(&mce_fake_paniced, 0); + atomic_set_unchecked(&mce_fake_paniced, 0); atomic_set(&mce_executing, 0); atomic_set(&mce_callin, 0); atomic_set(&global_nwo, 0); diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 5c0e653..0882b0a 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -12,6 +12,7 @@ #include #include #include +#include /* By default disabled */ int mce_p5_enabled __read_mostly; @@ -50,7 +51,9 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) if (!cpu_has(c, X86_FEATURE_MCE)) return; + pax_open_kernel(); machine_check_vector = pentium_machine_check; + pax_close_kernel(); /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ce04b58..b84acbd 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -290,7 +290,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, return notifier_from_errno(err); } -static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = +static struct notifier_block thermal_throttle_cpu_notifier = { .notifier_call = thermal_throttle_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 54060f5..c1a7577 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -11,6 +11,7 @@ #include #include #include +#include /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) @@ -24,7 +25,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; + pax_open_kernel(); machine_check_vector = winchip_machine_check; + pax_close_kernel(); /* Make sure the vector pointer is visible before we enable MCEs: */ wmb(); diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6b96110..0da73eb 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -62,7 +62,7 @@ static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; static bool mtrr_aps_delayed_init; -static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; +static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __read_only; const struct mtrr_ops *mtrr_if; diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index df5e41f..816c719 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -25,7 +25,7 @@ struct mtrr_ops { int (*validate_add_page)(unsigned long base, unsigned long size, unsigned int type); int (*have_wrcomb)(void); -}; +} __do_const; extern int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1c041e0..ec625d2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1532,7 +1532,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) break; perf_callchain_store(entry, frame.return_address); - fp = frame.next_frame; + fp = (const void __force_user *)frame.next_frame; } } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 212a6a4..322f5d9 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -172,7 +172,7 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, return notifier_from_errno(err); } -static struct notifier_block __refdata cpuid_class_cpu_notifier = +static struct notifier_block cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 69e231b..8b4e1c6 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -36,10 +36,8 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; -#endif -#ifdef CONFIG_X86_32 - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index afa64ad..dce67dd 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -36,7 +36,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return -ENOMEM; if (userbuf) { - if (copy_to_user(buf, vaddr + offset, csize)) { + if (copy_to_user((char __force_user *)buf, vaddr + offset, csize)) { iounmap(vaddr); return -EFAULT; } diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index 37250fe..bf2ec74 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c @@ -11,7 +11,7 @@ #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) +#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) @@ -21,7 +21,7 @@ static void doublefault_fn(void) unsigned long gdt, tss; store_gdt(&gdt_desc); - gdt = gdt_desc.address; + gdt = (unsigned long)gdt_desc.address; printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); @@ -58,10 +58,10 @@ struct tss_struct doublefault_tss __cacheline_aligned = { /* 0x2 bit is always set */ .flags = X86_EFLAGS_SF | 0x2, .sp = STACK_START, - .es = __USER_DS, + .es = __KERNEL_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, - .ds = __USER_DS, + .ds = __KERNEL_DS, .fs = __KERNEL_PERCPU, .__cr3 = __pa_nodebug(swapper_pg_dir), diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1aae78f..138ca1b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -2,6 +2,9 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ +#ifdef CONFIG_GRKERNSEC_HIDESYM +#define __INCLUDED_BY_HIDESYM 1 +#endif #include #include #include @@ -35,9 +38,8 @@ void printk_address(unsigned long address, int reliable) static void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { - struct task_struct *task = tinfo->task; unsigned long ret_addr; int index = task->curr_ret_stack; @@ -58,7 +60,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data, static inline void print_ftrace_graph_addr(unsigned long addr, void *data, const struct stacktrace_ops *ops, - struct thread_info *tinfo, int *graph) + struct task_struct *task, int *graph) { } #endif @@ -69,10 +71,8 @@ print_ftrace_graph_addr(unsigned long addr, void *data, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) +static inline int valid_stack_ptr(void *t, void *p, unsigned int size, void *end) { - void *t = tinfo; if (end) { if (p < end && p >= (end-THREAD_SIZE)) return 1; @@ -83,14 +83,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, } unsigned long -print_context_stack(struct thread_info *tinfo, +print_context_stack(struct task_struct *task, void *stack_start, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) { struct stack_frame *frame = (struct stack_frame *)bp; - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + while (valid_stack_ptr(stack_start, stack, sizeof(*stack), end)) { unsigned long addr; addr = *stack; @@ -102,7 +102,7 @@ print_context_stack(struct thread_info *tinfo, } else { ops->address(data, addr, 0); } - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } stack++; } @@ -111,7 +111,7 @@ print_context_stack(struct thread_info *tinfo, EXPORT_SYMBOL_GPL(print_context_stack); unsigned long -print_context_stack_bp(struct thread_info *tinfo, +print_context_stack_bp(struct task_struct *task, void *stack_start, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data, unsigned long *end, int *graph) @@ -119,7 +119,7 @@ print_context_stack_bp(struct thread_info *tinfo, struct stack_frame *frame = (struct stack_frame *)bp; unsigned long *ret_addr = &frame->return_address; - while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { + while (valid_stack_ptr(stack_start, ret_addr, sizeof(*ret_addr), end)) { unsigned long addr = *ret_addr; if (!__kernel_text_address(addr)) @@ -128,7 +128,7 @@ print_context_stack_bp(struct thread_info *tinfo, ops->address(data, addr, 1); frame = frame->next_frame; ret_addr = &frame->return_address; - print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + print_ftrace_graph_addr(addr, data, ops, task, graph); } return (unsigned long)frame; @@ -147,7 +147,7 @@ static int print_trace_stack(void *data, char *name) static void print_trace_address(void *data, unsigned long addr, int reliable) { touch_nmi_watchdog(); - printk(data); + printk("%s", (char *)data); printk_address(addr, reliable); } @@ -186,7 +186,7 @@ void dump_stack(void) bp = stack_frame(current, NULL); printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), + task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); @@ -222,6 +222,8 @@ unsigned __kprobes long oops_begin(void) } EXPORT_SYMBOL_GPL(oops_begin); +extern void gr_handle_kernel_exploit(void); + void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) @@ -243,7 +245,10 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + + gr_handle_kernel_exploit(); + + do_group_exit(signr); } int __kprobes __die(const char *str, struct pt_regs *regs, long err) @@ -269,7 +274,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) show_registers(regs); #ifdef CONFIG_X86_32 - if (user_mode_vm(regs)) { + if (user_mode(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; } else { @@ -297,7 +302,7 @@ void die(const char *str, struct pt_regs *regs, long err) unsigned long flags = oops_begin(); int sig = SIGSEGV; - if (!user_mode_vm(regs)) + if (!user_mode(regs)) report_bug(regs->ip, regs); if (__die(str, regs, err)) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index c99f9ed..76cf602 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -38,15 +38,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, bp = stack_frame(task, regs); for (;;) { - struct thread_info *context; + void *stack_start = (void *)((unsigned long)stack & ~(THREAD_SIZE-1)); - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); + bp = ops->walk_stack(task, stack_start, stack, bp, ops, data, NULL, &graph); - stack = (unsigned long *)context->previous_esp; - if (!stack) + if (stack_start == task_stack_page(task)) break; + stack = *(unsigned long **)stack_start; if (ops->stack(data, "IRQ") < 0) break; touch_nmi_watchdog(); @@ -96,21 +94,22 @@ void show_registers(struct pt_regs *regs) * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; u8 *ip; + unsigned long cs_base = get_desc_base(&get_cpu_gdt_table(0)[(0xffff & regs->cs) >> 3]); printk(KERN_EMERG "Stack:\n"); show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); printk(KERN_EMERG "Code: "); - ip = (u8 *)regs->ip - code_prologue; + ip = (u8 *)regs->ip - code_prologue + cs_base; if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { /* try starting at IP */ - ip = (u8 *)regs->ip; + ip = (u8 *)regs->ip + cs_base; code_len = code_len - code_prologue + 1; } for (i = 0; i < code_len; i++, ip++) { @@ -119,7 +118,7 @@ void show_registers(struct pt_regs *regs) printk(KERN_CONT " Bad EIP value."); break; } - if (ip == (u8 *)regs->ip) + if (ip == (u8 *)regs->ip + cs_base) printk(KERN_CONT "<%02x> ", c); else printk(KERN_CONT "%02x ", c); @@ -132,6 +131,7 @@ int is_valid_bugaddr(unsigned long ip) { unsigned short ud2; + ip = ktla_ktva(ip); if (ip < PAGE_OFFSET) return 0; if (probe_kernel_address((unsigned short *)ip, ud2)) @@ -139,3 +139,15 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } + +#if defined(CONFIG_PAX_MEMORY_STACKLEAK) || defined(CONFIG_PAX_USERCOPY) +void pax_check_alloca(unsigned long size) +{ + unsigned long sp = (unsigned long)&sp, stack_left; + + /* all kernel stacks are of the same size */ + stack_left = sp & (THREAD_SIZE - 1); + BUG_ON(stack_left < 256 || size >= stack_left - 256); +} +EXPORT_SYMBOL(pax_check_alloca); +#endif diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5e890cc..66ec71b 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -118,9 +118,9 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *irq_stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; - struct thread_info *tinfo; int graph = 0; unsigned long dummy; + void *stack_start; if (!task) task = current; @@ -141,10 +141,10 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * current stack address. If the stacks consist of nested * exceptions */ - tinfo = task_thread_info(task); for (;;) { char *id; unsigned long *estack_end; + estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id); @@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, id) < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, ops, + bp = ops->walk_stack(task, estack_end - EXCEPTION_STKSZ, stack, bp, ops, data, estack_end, &graph); ops->stack(data, ""); /* @@ -160,6 +160,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * second-to-last pointer (index -2 to end) in the * exception stack: */ + if ((u16)estack_end[-1] != __KERNEL_DS) + goto out; stack = (unsigned long *) estack_end[-2]; continue; } @@ -171,7 +173,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; - bp = ops->walk_stack(tinfo, stack, bp, + bp = ops->walk_stack(task, irq_stack, stack, bp, ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be @@ -190,7 +192,9 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); + stack_start = (void *)((unsigned long)stack & ~(THREAD_SIZE-1)); + bp = ops->walk_stack(task, stack_start, stack, bp, ops, data, NULL, &graph); +out: put_cpu(); } EXPORT_SYMBOL(dump_trace); @@ -248,7 +252,7 @@ void show_registers(struct pt_regs *regs) { int i; unsigned long sp; - const int cpu = smp_processor_id(); + const int cpu = raw_smp_processor_id(); struct task_struct *cur = current; sp = regs->sp; @@ -299,8 +303,55 @@ int is_valid_bugaddr(unsigned long ip) { unsigned short ud2; - if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2))) + if (probe_kernel_address((unsigned short *)ip, ud2)) return 0; return ud2 == 0x0b0f; } + +#if defined(CONFIG_PAX_MEMORY_STACKLEAK) || defined(CONFIG_PAX_USERCOPY) +void pax_check_alloca(unsigned long size) +{ + unsigned long sp = (unsigned long)&sp, stack_start, stack_end; + unsigned cpu, used; + char *id; + + /* check the process stack first */ + stack_start = (unsigned long)task_stack_page(current); + stack_end = stack_start + THREAD_SIZE; + if (likely(stack_start <= sp && sp < stack_end)) { + unsigned long stack_left = sp & (THREAD_SIZE - 1); + BUG_ON(stack_left < 256 || size >= stack_left - 256); + return; + } + + cpu = get_cpu(); + + /* check the irq stacks */ + stack_end = (unsigned long)per_cpu(irq_stack_ptr, cpu); + stack_start = stack_end - IRQ_STACK_SIZE; + if (stack_start <= sp && sp < stack_end) { + unsigned long stack_left = sp & (IRQ_STACK_SIZE - 1); + put_cpu(); + BUG_ON(stack_left < 256 || size >= stack_left - 256); + return; + } + + /* check the exception stacks */ + used = 0; + stack_end = (unsigned long)in_exception_stack(cpu, sp, &used, &id); + stack_start = stack_end - EXCEPTION_STKSZ; + if (stack_end && stack_start <= sp && sp < stack_end) { + unsigned long stack_left = sp & (EXCEPTION_STKSZ - 1); + put_cpu(); + BUG_ON(stack_left < 256 || size >= stack_left - 256); + return; + } + + put_cpu(); + + /* unknown stack */ + BUG(); +} +EXPORT_SYMBOL(pax_check_alloca); +#endif diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 303a0e4..0aad351 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -829,8 +829,8 @@ unsigned long __init e820_end_of_low_ram_pfn(void) static void early_panic(char *msg) { - early_printk(msg); - panic(msg); + early_printk("%s", msg); + panic("%s", msg); } static int userdef __initdata; diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index cd28a35..c72ed9a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 0fa4f89..dbbfa58 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -180,13 +180,154 @@ /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg + +#ifdef CONFIG_CC_STACKPROTECTOR movl $(__KERNEL_STACK_CANARY), \reg +#elif defined(CONFIG_PAX_MEMORY_UDEREF) + movl $(__USER_DS), \reg +#else + xorl \reg, \reg +#endif + movl \reg, %gs .endm #endif /* CONFIG_X86_32_LAZY_GS */ -.macro SAVE_ALL +.macro pax_enter_kernel +#ifdef CONFIG_PAX_KERNEXEC + call pax_enter_kernel +#endif +.endm + +.macro pax_exit_kernel +#ifdef CONFIG_PAX_KERNEXEC + call pax_exit_kernel +#endif +.endm + +#ifdef CONFIG_PAX_KERNEXEC +ENTRY(pax_enter_kernel) +#ifdef CONFIG_PARAVIRT + pushl %eax + pushl %ecx + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0) + mov %eax, %esi +#else + mov %cr0, %esi +#endif + bts $16, %esi + jnc 1f + mov %cs, %esi + cmp $__KERNEL_CS, %esi + jz 3f + ljmp $__KERNEL_CS, $3f +1: ljmp $__KERNEXEC_KERNEL_CS, $2f +2: +#ifdef CONFIG_PARAVIRT + mov %esi, %eax + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0) +#else + mov %esi, %cr0 +#endif +3: +#ifdef CONFIG_PARAVIRT + popl %ecx + popl %eax +#endif + ret +ENDPROC(pax_enter_kernel) + +ENTRY(pax_exit_kernel) +#ifdef CONFIG_PARAVIRT + pushl %eax + pushl %ecx +#endif + mov %cs, %esi + cmp $__KERNEXEC_KERNEL_CS, %esi + jnz 2f +#ifdef CONFIG_PARAVIRT + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); + mov %eax, %esi +#else + mov %cr0, %esi +#endif + btr $16, %esi + ljmp $__KERNEL_CS, $1f +1: +#ifdef CONFIG_PARAVIRT + mov %esi, %eax + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_write_cr0); +#else + mov %esi, %cr0 +#endif +2: +#ifdef CONFIG_PARAVIRT + popl %ecx + popl %eax +#endif + ret +ENDPROC(pax_exit_kernel) +#endif + +.macro pax_erase_kstack +#ifdef CONFIG_PAX_MEMORY_STACKLEAK + call pax_erase_kstack +#endif +.endm + +#ifdef CONFIG_PAX_MEMORY_STACKLEAK +/* + * ebp: thread_info + */ +ENTRY(pax_erase_kstack) + pushl %edi + pushl %ecx + pushl %eax + + mov TI_lowest_stack(%ebp), %edi + mov $-0xBEEF, %eax + std + +1: mov %edi, %ecx + and $THREAD_SIZE_asm - 1, %ecx + shr $2, %ecx + repne scasl + jecxz 2f + + cmp $2*16, %ecx + jc 2f + + mov $2*16, %ecx + repe scasl + jecxz 2f + jne 1b + +2: cld + or $2*4, %edi + mov %esp, %ecx + sub %edi, %ecx + + cmp $THREAD_SIZE_asm, %ecx + jb 3f + ud2 +3: + + shr $2, %ecx + rep stosl + + mov TI_task_thread_sp0(%ebp), %edi + sub $128, %edi + mov %edi, TI_lowest_stack(%ebp) + + popl %eax + popl %ecx + popl %edi + ret +ENDPROC(pax_erase_kstack) +#endif + +.macro __SAVE_ALL _DS cld PUSH_GS pushl_cfi %fs @@ -209,7 +350,7 @@ CFI_REL_OFFSET ecx, 0 pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 - movl $(__USER_DS), %edx + movl $\_DS, %edx movl %edx, %ds movl %edx, %es movl $(__KERNEL_PERCPU), %edx @@ -217,6 +358,15 @@ SET_KERNEL_GS %edx .endm +.macro SAVE_ALL +#if defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) + __SAVE_ALL __KERNEL_DS + pax_enter_kernel +#else + __SAVE_ALL __USER_DS +#endif +.endm + .macro RESTORE_INT_REGS popl_cfi %ebx CFI_RESTORE ebx @@ -302,7 +452,7 @@ ENTRY(ret_from_fork) popfl_cfi jmp syscall_exit CFI_ENDPROC -END(ret_from_fork) +ENDPROC(ret_from_fork) /* * Interrupt exit functions should be protected against kprobes @@ -336,7 +486,15 @@ resume_userspace_sig: andl $SEGMENT_RPL_MASK, %eax #endif cmpl $USER_RPL, %eax + +#ifdef CONFIG_PAX_KERNEXEC + jae resume_userspace + + pax_exit_kernel + jmp resume_kernel +#else jb resume_kernel # not returning to v8086 or userspace +#endif ENTRY(resume_userspace) LOCKDEP_SYS_EXIT @@ -348,8 +506,8 @@ ENTRY(resume_userspace) andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? jne work_pending - jmp restore_all -END(ret_from_exception) + jmp restore_all_pax +ENDPROC(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) @@ -364,7 +522,7 @@ need_resched: jz restore_all call preempt_schedule_irq jmp need_resched -END(resume_kernel) +ENDPROC(resume_kernel) #endif CFI_ENDPROC /* @@ -398,23 +556,34 @@ sysenter_past_esp: /*CFI_REL_OFFSET cs, 0*/ /* * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) + pushl_cfi $0 CFI_REL_OFFSET eip, 0 pushl_cfi %eax SAVE_ALL + GET_THREAD_INFO(%ebp) + movl TI_sysenter_return(%ebp),%ebp + movl %ebp,PT_EIP(%esp) ENABLE_INTERRUPTS(CLBR_NONE) /* * Load the potential sixth argument from user stack. * Careful about security. */ + movl PT_OLDESP(%esp),%ebp + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov PT_OLDSS(%esp),%ds +1: movl %ds:(%ebp),%ebp + push %ss + pop %ds +#else cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp +#endif + movl %ebp,PT_EBP(%esp) .section __ex_table,"a" .align 4 @@ -423,6 +592,10 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) +#ifdef CONFIG_PAX_RANDKSTACK + pax_erase_kstack +#endif + testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz sysenter_audit sysenter_do_call: @@ -438,12 +611,24 @@ sysenter_after_call: testl $_TIF_ALLWORK_MASK, %ecx jne sysexit_audit sysenter_exit: + +#ifdef CONFIG_PAX_RANDKSTACK + pushl_cfi %eax + movl %esp, %eax + call pax_randomize_kstack + popl_cfi %eax +#endif + + pax_erase_kstack + /* if something modifies registers it must also disable sysexit */ movl PT_EIP(%esp), %edx movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs +2: mov PT_DS(%esp), %ds +3: mov PT_ES(%esp), %es PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT @@ -460,6 +645,9 @@ sysenter_audit: movl %eax,%edx /* 2nd arg: syscall number */ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ call audit_syscall_entry + + pax_erase_kstack + pushl_cfi %ebx movl PT_EAX(%esp),%eax /* reload syscall number */ jmp sysenter_do_call @@ -486,11 +674,17 @@ sysexit_audit: CFI_ENDPROC .pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) +4: movl $0,PT_FS(%esp) + jmp 1b +5: movl $0,PT_DS(%esp) + jmp 1b +6: movl $0,PT_ES(%esp) jmp 1b .section __ex_table,"a" .align 4 - .long 1b,2b + .long 1b,4b + .long 2b,5b + .long 3b,6b .popsection PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) @@ -505,6 +699,11 @@ ENTRY(system_call) pushl_cfi %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + +#ifdef CONFIG_PAX_RANDKSTACK + pax_erase_kstack +#endif + # system call tracing in operation / emulation testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) jnz syscall_trace_entry @@ -524,6 +723,15 @@ syscall_exit: testl $_TIF_ALLWORK_MASK, %ecx # current->work jne syscall_exit_work +restore_all_pax: + +#ifdef CONFIG_PAX_RANDKSTACK + movl %esp, %eax + call pax_randomize_kstack +#endif + + pax_erase_kstack + restore_all: TRACE_IRQS_IRET restore_all_notrace: @@ -581,14 +789,34 @@ ldt_ss: * compensating for the offset by changing to the ESPFIX segment with * a base address that matches for the difference. */ -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)(%ebx) mov %esp, %edx /* load kernel esp */ mov PT_OLDESP(%esp), %eax /* load userspace esp */ mov %dx, %ax /* eax: new kernel esp */ sub %eax, %edx /* offset (low word is 0) */ +#ifdef CONFIG_SMP + movl PER_CPU_VAR(cpu_number), %ebx + shll $PAGE_SHIFT_asm, %ebx + addl $cpu_gdt_table, %ebx +#else + movl $cpu_gdt_table, %ebx +#endif shr $16, %edx - mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ - mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ + +#ifdef CONFIG_PAX_KERNEXEC + mov %cr0, %esi + btr $16, %esi + mov %esi, %cr0 +#endif + + mov %dl, 4 + GDT_ESPFIX_SS /* bits 16..23 */ + mov %dh, 7 + GDT_ESPFIX_SS /* bits 24..31 */ + +#ifdef CONFIG_PAX_KERNEXEC + bts $16, %esi + mov %esi, %cr0 +#endif + pushl_cfi $__ESPFIX_SS pushl_cfi %eax /* new kernel esp */ /* Disable interrupts, but do not irqtrace this section: we @@ -618,34 +846,28 @@ work_resched: movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? - jz restore_all + jz restore_all_pax testb $_TIF_NEED_RESCHED, %cl jnz work_resched work_notifysig: # deal with pending signals and # notify-resume requests + movl %esp, %eax #ifdef CONFIG_VM86 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jne work_notifysig_v86 # returning to kernel-space or + jz 1f # returning to kernel-space or # vm86-space - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig - ALIGN -work_notifysig_v86: pushl_cfi %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl_cfi %ecx movl %eax, %esp -#else - movl %esp, %eax +1: #endif xorl %edx, %edx call do_notify_resume jmp resume_userspace_sig -END(work_pending) +ENDPROC(work_pending) # perform syscall exit tracing ALIGN @@ -653,11 +875,14 @@ syscall_trace_entry: movl $-ENOSYS,PT_EAX(%esp) movl %esp, %eax call syscall_trace_enter + + pax_erase_kstack + /* What it returned is what we'll actually use. */ cmpl $(nr_syscalls), %eax jnae syscall_call jmp syscall_exit -END(syscall_trace_entry) +ENDPROC(syscall_trace_entry) # perform syscall exit tracing ALIGN @@ -670,25 +895,29 @@ syscall_exit_work: movl %esp, %eax call syscall_trace_leave jmp resume_userspace -END(syscall_exit_work) +ENDPROC(syscall_exit_work) CFI_ENDPROC RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: +#ifdef CONFIG_PAX_MEMORY_UDEREF + push %ss + pop %ds +#endif GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace -END(syscall_fault) +ENDPROC(syscall_fault) syscall_badsys: movl $-ENOSYS,%eax jmp syscall_after_call -END(syscall_badsys) +ENDPROC(syscall_badsys) sysenter_badsys: movl $-ENOSYS,%eax jmp sysenter_after_call -END(syscall_badsys) +ENDPROC(sysenter_badsys) CFI_ENDPROC /* * End of kprobes section @@ -762,6 +991,36 @@ ptregs_clone: CFI_ENDPROC ENDPROC(ptregs_clone) + ALIGN; +ENTRY(kernel_execve) + CFI_STARTPROC + pushl_cfi %ebp + sub $PT_OLDSS+4,%esp + pushl_cfi %edi + pushl_cfi %ecx + pushl_cfi %eax + lea 3*4(%esp),%edi + mov $PT_OLDSS/4+1,%ecx + xorl %eax,%eax + rep stosl + popl_cfi %eax + popl_cfi %ecx + popl_cfi %edi + movl $X86_EFLAGS_IF,PT_EFLAGS(%esp) + pushl_cfi %esp + call sys_execve + add $4,%esp + CFI_ADJUST_CFA_OFFSET -4 + GET_THREAD_INFO(%ebp) + test %eax,%eax + jz syscall_exit + add $PT_OLDSS+4,%esp + CFI_ADJUST_CFA_OFFSET -PT_OLDSS-4 + popl_cfi %ebp + ret + CFI_ENDPROC +ENDPROC(kernel_execve) + .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack @@ -772,8 +1031,15 @@ ENDPROC(ptregs_clone) */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ +#ifdef CONFIG_SMP + movl PER_CPU_VAR(cpu_number), %ebx + shll $PAGE_SHIFT_asm, %ebx + addl $cpu_gdt_table, %ebx +#else + movl $cpu_gdt_table, %ebx +#endif + mov 4 + GDT_ESPFIX_SS, %al /* bits 16..23 */ + mov 7 + GDT_ESPFIX_SS, %ah /* bits 24..31 */ shl $16, %eax addl %esp, %eax /* the adjusted stack pointer */ pushl_cfi $__KERNEL_DS @@ -829,7 +1095,7 @@ vector=vector+1 .endr 2: jmp common_interrupt .endr -END(irq_entries_start) +ENDPROC(irq_entries_start) .previous END(interrupt) @@ -877,7 +1143,7 @@ ENTRY(coprocessor_error) pushl_cfi $do_coprocessor_error jmp error_code CFI_ENDPROC -END(coprocessor_error) +ENDPROC(coprocessor_error) ENTRY(simd_coprocessor_error) RING0_INT_FRAME @@ -898,7 +1164,7 @@ ENTRY(simd_coprocessor_error) #endif jmp error_code CFI_ENDPROC -END(simd_coprocessor_error) +ENDPROC(simd_coprocessor_error) ENTRY(device_not_available) RING0_INT_FRAME @@ -906,7 +1172,7 @@ ENTRY(device_not_available) pushl_cfi $do_device_not_available jmp error_code CFI_ENDPROC -END(device_not_available) +ENDPROC(device_not_available) #ifdef CONFIG_PARAVIRT ENTRY(native_iret) @@ -915,12 +1181,12 @@ ENTRY(native_iret) .align 4 .long native_iret, iret_exc .previous -END(native_iret) +ENDPROC(native_iret) ENTRY(native_irq_enable_sysexit) sti sysexit -END(native_irq_enable_sysexit) +ENDPROC(native_irq_enable_sysexit) #endif ENTRY(overflow) @@ -929,7 +1195,7 @@ ENTRY(overflow) pushl_cfi $do_overflow jmp error_code CFI_ENDPROC -END(overflow) +ENDPROC(overflow) ENTRY(bounds) RING0_INT_FRAME @@ -937,7 +1203,7 @@ ENTRY(bounds) pushl_cfi $do_bounds jmp error_code CFI_ENDPROC -END(bounds) +ENDPROC(bounds) ENTRY(invalid_op) RING0_INT_FRAME @@ -945,7 +1211,7 @@ ENTRY(invalid_op) pushl_cfi $do_invalid_op jmp error_code CFI_ENDPROC -END(invalid_op) +ENDPROC(invalid_op) ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME @@ -953,35 +1219,35 @@ ENTRY(coprocessor_segment_overrun) pushl_cfi $do_coprocessor_segment_overrun jmp error_code CFI_ENDPROC -END(coprocessor_segment_overrun) +ENDPROC(coprocessor_segment_overrun) ENTRY(invalid_TSS) RING0_EC_FRAME pushl_cfi $do_invalid_TSS jmp error_code CFI_ENDPROC -END(invalid_TSS) +ENDPROC(invalid_TSS) ENTRY(segment_not_present) RING0_EC_FRAME pushl_cfi $do_segment_not_present jmp error_code CFI_ENDPROC -END(segment_not_present) +ENDPROC(segment_not_present) ENTRY(stack_segment) RING0_EC_FRAME pushl_cfi $do_stack_segment jmp error_code CFI_ENDPROC -END(stack_segment) +ENDPROC(stack_segment) ENTRY(alignment_check) RING0_EC_FRAME pushl_cfi $do_alignment_check jmp error_code CFI_ENDPROC -END(alignment_check) +ENDPROC(alignment_check) ENTRY(divide_error) RING0_INT_FRAME @@ -989,7 +1255,7 @@ ENTRY(divide_error) pushl_cfi $do_divide_error jmp error_code CFI_ENDPROC -END(divide_error) +ENDPROC(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) @@ -998,7 +1264,7 @@ ENTRY(machine_check) pushl_cfi machine_check_vector jmp error_code CFI_ENDPROC -END(machine_check) +ENDPROC(machine_check) #endif ENTRY(spurious_interrupt_bug) @@ -1007,7 +1273,7 @@ ENTRY(spurious_interrupt_bug) pushl_cfi $do_spurious_interrupt_bug jmp error_code CFI_ENDPROC -END(spurious_interrupt_bug) +ENDPROC(spurious_interrupt_bug) /* * End of kprobes section */ @@ -1123,7 +1389,7 @@ BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, ENTRY(mcount) ret -END(mcount) +ENDPROC(mcount) ENTRY(ftrace_caller) cmpl $0, function_trace_stop @@ -1152,7 +1418,7 @@ ftrace_graph_call: .globl ftrace_stub ftrace_stub: ret -END(ftrace_caller) +ENDPROC(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ @@ -1188,7 +1454,7 @@ trace: popl %ecx popl %eax jmp ftrace_stub -END(mcount) +ENDPROC(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -1209,7 +1475,7 @@ ENTRY(ftrace_graph_caller) popl %ecx popl %eax ret -END(ftrace_graph_caller) +ENDPROC(ftrace_graph_caller) .globl return_to_handler return_to_handler: @@ -1223,7 +1489,6 @@ return_to_handler: jmp *%ecx #endif -.section .rodata,"a" #include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) @@ -1269,15 +1534,18 @@ error_code: movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart REG_TO_PTGS %ecx SET_KERNEL_GS %ecx - movl $(__USER_DS), %ecx + movl $(__KERNEL_DS), %ecx movl %ecx, %ds movl %ecx, %es + + pax_enter_kernel + TRACE_IRQS_OFF movl %esp,%eax # pt_regs pointer call *%edi jmp ret_from_exception CFI_ENDPROC -END(page_fault) +ENDPROC(page_fault) /* * Debug traps and NMI can happen at the one SYSENTER instruction @@ -1319,7 +1587,7 @@ debug_stack_correct: call do_debug jmp ret_from_exception CFI_ENDPROC -END(debug) +ENDPROC(debug) /* * NMI is doubly nasty. It can happen _while_ we're handling @@ -1358,6 +1626,9 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi + + pax_exit_kernel + jmp restore_all_notrace CFI_ENDPROC @@ -1395,13 +1666,16 @@ nmi_espfix_stack: FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi + + pax_exit_kernel + RESTORE_REGS lss 12+4(%esp), %esp # back to espfix stack CFI_ADJUST_CFA_OFFSET -24 jmp irq_return #endif CFI_ENDPROC -END(nmi) +ENDPROC(nmi) ENTRY(int3) RING0_INT_FRAME @@ -1413,14 +1687,14 @@ ENTRY(int3) call do_int3 jmp ret_from_exception CFI_ENDPROC -END(int3) +ENDPROC(int3) ENTRY(general_protection) RING0_EC_FRAME pushl_cfi $do_general_protection jmp error_code CFI_ENDPROC -END(general_protection) +ENDPROC(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) @@ -1428,7 +1702,7 @@ ENTRY(async_page_fault) pushl_cfi $do_async_page_fault jmp error_code CFI_ENDPROC -END(async_page_fault) +ENDPROC(async_page_fault) #endif /* diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8d15c69..30e8c80 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,8 @@ #include #include #include +#include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -69,8 +71,9 @@ #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) + pax_force_retaddr retq -END(mcount) +ENDPROC(mcount) ENTRY(ftrace_caller) cmpl $0, function_trace_stop @@ -93,8 +96,9 @@ GLOBAL(ftrace_graph_call) #endif GLOBAL(ftrace_stub) + pax_force_retaddr retq -END(ftrace_caller) +ENDPROC(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -113,6 +117,7 @@ ENTRY(mcount) #endif GLOBAL(ftrace_stub) + pax_force_retaddr retq trace: @@ -122,12 +127,13 @@ trace: movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi + pax_force_fptr ftrace_trace_function call *ftrace_trace_function MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +ENDPROC(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ @@ -147,8 +153,9 @@ ENTRY(ftrace_graph_caller) MCOUNT_RESTORE_FRAME + pax_force_retaddr retq -END(ftrace_graph_caller) +ENDPROC(ftrace_graph_caller) GLOBAL(return_to_handler) subq $24, %rsp @@ -164,6 +171,7 @@ GLOBAL(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp + pax_force_fptr %rdi jmp *%rdi #endif @@ -179,6 +187,286 @@ ENTRY(native_usergs_sysret64) ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ + .macro ljmpq sel, off +#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) || defined (CONFIG_MATOM) + .byte 0x48; ljmp *1234f(%rip) + .pushsection .rodata + .align 16 + 1234: .quad \off; .word \sel + .popsection +#else + pushq $\sel + pushq $\off + lretq +#endif + .endm + + .macro pax_enter_kernel + pax_set_fptr_mask +#ifdef CONFIG_PAX_KERNEXEC + call pax_enter_kernel +#endif + .endm + + .macro pax_exit_kernel +#ifdef CONFIG_PAX_KERNEXEC + call pax_exit_kernel +#endif + .endm + +#ifdef CONFIG_PAX_KERNEXEC +ENTRY(pax_enter_kernel) + pushq %rdi + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + + GET_CR0_INTO_RDI + bts $16,%rdi + jnc 3f + mov %cs,%edi + cmp $__KERNEL_CS,%edi + jnz 2f +1: + +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI) +#endif + + popq %rdi + pax_force_retaddr + retq + +2: ljmpq __KERNEL_CS,1b +3: ljmpq __KERNEXEC_KERNEL_CS,4f +4: SET_RDI_INTO_CR0 + jmp 1b +ENDPROC(pax_enter_kernel) + +ENTRY(pax_exit_kernel) + pushq %rdi + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + + mov %cs,%rdi + cmp $__KERNEXEC_KERNEL_CS,%edi + jz 2f + GET_CR0_INTO_RDI + bts $16,%rdi + jnc 4f +1: + +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI); +#endif + + popq %rdi + pax_force_retaddr + retq + +2: GET_CR0_INTO_RDI + btr $16,%rdi + jnc 4f + ljmpq __KERNEL_CS,3f +3: SET_RDI_INTO_CR0 + jmp 1b +4: ud2 + jmp 4b +ENDPROC(pax_exit_kernel) +#endif + + .macro pax_enter_kernel_user + pax_set_fptr_mask +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_enter_kernel_user +#endif + .endm + + .macro pax_exit_kernel_user +#ifdef CONFIG_PAX_MEMORY_UDEREF + call pax_exit_kernel_user +#endif +#ifdef CONFIG_PAX_RANDKSTACK + pushq %rax + pushq %r11 + call pax_randomize_kstack + popq %r11 + popq %rax +#endif + .endm + +#ifdef CONFIG_PAX_MEMORY_UDEREF +ENTRY(pax_enter_kernel_user) + pushq %rdi + pushq %rbx + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + + GET_CR3_INTO_RDI + mov %rdi,%rbx + add $__START_KERNEL_map,%rbx + sub phys_base(%rip),%rbx + +#ifdef CONFIG_PARAVIRT + cmpl $0, pv_info+PARAVIRT_enabled + jz 1f + pushq %rdi + i = 0 + .rept USER_PGD_PTRS + mov i*8(%rbx),%rsi + mov $0,%sil + lea i*8(%rbx),%rdi + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_set_pgd_batched) + i = i + 1 + .endr + popq %rdi + jmp 2f +1: +#endif + + i = 0 + .rept USER_PGD_PTRS + movb $0,i*8(%rbx) + i = i + 1 + .endr + +#ifdef CONFIG_PARAVIRT +2: +#endif + SET_RDI_INTO_CR3 + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_RDI + bts $16,%rdi + SET_RDI_INTO_CR0 +#endif + +#ifdef CONFIG_PARAVIRT + PV_RESTORE_REGS(CLBR_RDI) +#endif + + popq %rbx + popq %rdi + pax_force_retaddr + retq +ENDPROC(pax_enter_kernel_user) + +ENTRY(pax_exit_kernel_user) + pushq %rdi + pushq %rbx + +#ifdef CONFIG_PARAVIRT + PV_SAVE_REGS(CLBR_RDI) +#endif + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_RDI + btr $16,%rdi + jnc 3f + SET_RDI_INTO_CR0 +#endif + + GET_CR3_INTO_RDI + mov %rdi,%rbx + add $__START_KERNEL_map,%rbx + sub phys_base(%rip),%rbx + +#ifdef CONFIG_PARAVIRT + cmpl $0, pv_info+PARAVIRT_enabled + jz 1f + i = 0 + .rept USER_PGD_PTRS + mov i*8(%rbx),%rsi + mov $0x67,%sil + lea i*8(%rbx),%rdi + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_set_pgd_batched) + i = i + 1 + .endr + jmp 2f +1: +#endif + + i = 0 + .rept USER_PGD_PTRS + movb $0x67,i*8(%rbx) + i = i + 1 + .endr + +#ifdef CONFIG_PARAVIRT +2: PV_RESTORE_REGS(CLBR_RDI) +#endif + + popq %rbx + popq %rdi + pax_force_retaddr + retq +3: ud2 + jmp 3b +ENDPROC(pax_exit_kernel_user) +#endif + +.macro pax_erase_kstack +#ifdef CONFIG_PAX_MEMORY_STACKLEAK + call pax_erase_kstack +#endif +.endm + +#ifdef CONFIG_PAX_MEMORY_STACKLEAK +ENTRY(pax_erase_kstack) + pushq %rdi + pushq %rcx + pushq %rax + pushq %r11 + + GET_THREAD_INFO(%r11) + mov TI_lowest_stack(%r11), %rdi + mov $-0xBEEF, %rax + std + +1: mov %edi, %ecx + and $THREAD_SIZE_asm - 1, %ecx + shr $3, %ecx + repne scasq + jecxz 2f + + cmp $2*8, %ecx + jc 2f + + mov $2*8, %ecx + repe scasq + jecxz 2f + jne 1b + +2: cld + or $2*8, %rdi + mov %esp, %ecx + sub %edi, %ecx + + cmp $THREAD_SIZE_asm, %rcx + jb 3f + ud2 +3: + + shr $3, %ecx + rep stosq + + mov TI_task_thread_sp0(%r11), %rdi + sub $256, %rdi + mov %rdi, TI_lowest_stack(%r11) + + popq %r11 + popq %rax + popq %rcx + popq %rdi + pax_force_retaddr + ret +ENDPROC(pax_erase_kstack) +#endif .macro TRACE_IRQS_IRETQ offset=ARGOFFSET #ifdef CONFIG_TRACE_IRQFLAGS @@ -232,8 +520,8 @@ ENDPROC(native_usergs_sysret64) .endm .macro UNFAKE_STACK_FRAME - addq $8*6, %rsp - CFI_ADJUST_CFA_OFFSET -(6*8) + addq $8*6 + ARG_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET -(6*8 + ARG_SKIP) .endm /* @@ -302,25 +590,26 @@ ENDPROC(native_usergs_sysret64) /* save partial stack frame */ .macro SAVE_ARGS_IRQ cld - /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP + /* start from r15 in pt_regs and jump over */ + movq_cfi rdi, RDI + movq_cfi rsi, RSI + movq_cfi rdx, RDX + movq_cfi rcx, RCX + movq_cfi rax, RAX + movq_cfi r8, R8 + movq_cfi r9, R9 + movq_cfi r10, R10 + movq_cfi r11, R11 + movq_cfi r12, R12 /* Save rbp so that we can unwind from get_irq_regs() */ - movq_cfi rbp, 0 + movq_cfi rbp, RBP /* Save previous stack value */ movq %rsp, %rsi - leaq -RBP(%rsp),%rdi /* arg1 for handler */ - testl $3, CS(%rdi) + movq %rsp,%rdi /* arg1 for handler */ + testb $3, CS(%rsi) je 1f SWAPGS /* @@ -341,24 +630,39 @@ ENDPROC(native_usergs_sysret64) 0x06 /* DW_OP_deref */, \ 0x08 /* DW_OP_const1u */, SS+8-RBP, \ 0x22 /* DW_OP_plus */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS(%rdi) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif + /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF .endm ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 - movq 5*8+16(%rsp), %r11 /* save return address */ + PARTIAL_FRAME 1 8 movq_cfi rbx, RBX+16 movq_cfi rbp, RBP+16 + +#ifndef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR movq_cfi r12, R12+16 +#endif + movq_cfi r13, R13+16 movq_cfi r14, R14+16 movq_cfi r15, R15+16 - movq %r11, 8(%rsp) /* return address */ FIXUP_TOP_OF_STACK %r11, 16 + pax_force_retaddr ret CFI_ENDPROC -END(save_rest) +ENDPROC(save_rest) /* save complete stack frame */ .pushsection .kprobes.text, "ax" @@ -387,10 +691,21 @@ ENTRY(save_paranoid) js 1f /* negative -> in kernel */ SWAPGS xorl %ebx,%ebx -1: ret +1: +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS+8(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif + pax_force_retaddr + ret CFI_ENDPROC -END(save_paranoid) - .popsection +ENDPROC(save_paranoid) /* * A newly forked process directly context switches into this address. @@ -411,7 +726,7 @@ ENTRY(ret_from_fork) RESTORE_REST - testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? + testb $3, CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call /* @@ -424,7 +739,7 @@ ENTRY(ret_from_fork) jmp int_ret_from_sys_call CFI_ENDPROC -END(ret_from_fork) +ENDPROC(ret_from_fork) /* * System call entry. Up to 6 arguments in registers are supported. @@ -460,7 +775,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_DEF_CFA rsp,0 CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -473,12 +788,18 @@ ENTRY(system_call_after_swapgs) movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp + SAVE_ARGS 8*6,0 + pax_enter_kernel_user + +#ifdef CONFIG_PAX_RANDKSTACK + pax_erase_kstack +#endif + /* * No need to follow this irqs off/on section - it's straight * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET @@ -507,6 +828,8 @@ sysret_check: andl %edi,%edx jnz sysret_careful CFI_REMEMBER_STATE + pax_exit_kernel_user + pax_erase_kstack /* * sysretq will re-enable interrupts: */ @@ -565,6 +888,9 @@ auditsys: movq %rax,%rsi /* 2nd arg: syscall number */ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ call audit_syscall_entry + + pax_erase_kstack + LOAD_ARGS 0 /* reload call-clobbered registers */ jmp system_call_fastpath @@ -595,12 +921,15 @@ tracesys: FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter + + pax_erase_kstack + /* * Reload arg registers from stack in case ptrace changed them. * We don't reload %rax because syscall_trace_enter() returned * the value it wants us to use in the table lookup. */ - LOAD_ARGS ARGOFFSET, 1 + LOAD_ARGS 1 RESTORE_REST cmpq $__NR_syscall_max,%rax ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ @@ -616,7 +945,7 @@ tracesys: GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $3,CS-ARGOFFSET(%rsp) + testb $3,CS-ARGOFFSET(%rsp) je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ @@ -627,7 +956,9 @@ GLOBAL(int_with_check) andl %edi,%edx jnz int_careful andl $~TS_COMPAT,TI_status(%rcx) - jmp retint_swapgs + pax_exit_kernel_user + pax_erase_kstack + jmp retint_swapgs_pax /* Either reschedule or signal or syscall exit tracking needed. */ /* First do a reschedule test. */ @@ -673,7 +1004,7 @@ int_restore_rest: TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC -END(system_call) +ENDPROC(system_call) /* * Certain special system calls that need to save a complete full stack frame. @@ -681,15 +1012,13 @@ END(system_call) .macro PTREGSCALL label,func,arg ENTRY(\label) PARTIAL_FRAME 1 8 /* offset 8: return address */ - subq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET REST_SKIP call save_rest DEFAULT_FRAME 0 8 /* offset 8: return address */ leaq 8(%rsp), \arg /* pt_regs pointer */ call \func jmp ptregscall_common CFI_ENDPROC -END(\label) +ENDPROC(\label) .endm PTREGSCALL stub_clone, sys_clone, %r8 @@ -704,12 +1033,17 @@ ENTRY(ptregscall_common) movq_cfi_restore R15+8, r15 movq_cfi_restore R14+8, r14 movq_cfi_restore R13+8, r13 + +#ifndef CONFIG_PAX_KERNEXEC_PLUGIN_METHOD_OR movq_cfi_restore R12+8, r12 +#endif + movq_cfi_restore RBP+8, rbp movq_cfi_restore RBX+8, rbx - ret $REST_SKIP /* pop extended registers */ + pax_force_retaddr + ret CFI_ENDPROC -END(ptregscall_common) +ENDPROC(ptregscall_common) ENTRY(stub_execve) CFI_STARTPROC @@ -724,7 +1058,7 @@ ENTRY(stub_execve) RESTORE_REST jmp int_ret_from_sys_call CFI_ENDPROC -END(stub_execve) +ENDPROC(stub_execve) /* * sigreturn is special because it needs to restore all registers on return. @@ -742,7 +1076,7 @@ ENTRY(stub_rt_sigreturn) RESTORE_REST jmp int_ret_from_sys_call CFI_ENDPROC -END(stub_rt_sigreturn) +ENDPROC(stub_rt_sigreturn) /* * Build the entry stubs and pointer table with some assembler magic. @@ -777,7 +1111,7 @@ vector=vector+1 2: jmp common_interrupt .endr CFI_ENDPROC -END(irq_entries_start) +ENDPROC(irq_entries_start) .previous END(interrupt) @@ -794,8 +1128,8 @@ END(interrupt) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func /* reserve pt_regs for scratch regs and rbp */ - subq $ORIG_RAX-RBP, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP + subq $ORIG_RAX, %rsp + CFI_ADJUST_CFA_OFFSET ORIG_RAX SAVE_ARGS_IRQ call \func .endm @@ -822,13 +1156,13 @@ ret_from_intr: /* Restore saved previous stack */ popq %rsi CFI_DEF_CFA_REGISTER rsi - leaq ARGOFFSET-RBP(%rsi), %rsp + movq %rsi, %rsp CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET + CFI_ADJUST_CFA_OFFSET -ARGOFFSET exit_intr: GET_THREAD_INFO(%rcx) - testl $3,CS-ARGOFFSET(%rsp) + testb $3,CS-ARGOFFSET(%rsp) je retint_kernel /* Interrupt came from user space */ @@ -850,12 +1184,16 @@ retint_swapgs: /* return to user-space */ * The iretq could re-enable interrupts: */ DISABLE_INTERRUPTS(CLBR_ANY) + pax_exit_kernel_user +retint_swapgs_pax: TRACE_IRQS_IRETQ SWAPGS jmp restore_args retint_restore_args: /* return to kernel space */ DISABLE_INTERRUPTS(CLBR_ANY) + pax_exit_kernel + pax_force_retaddr (RIP-ARGOFFSET) /* * The iretq could re-enable interrupts: */ @@ -893,15 +1231,15 @@ native_irq_return_ldt: SWAPGS movq PER_CPU_VAR(espfix_waddr),%rdi movq %rax,(0*8)(%rdi) /* RAX */ - movq (2*8)(%rsp),%rax /* RIP */ + movq (2*8 + RIP-RIP)(%rsp),%rax /* RIP */ movq %rax,(1*8)(%rdi) - movq (3*8)(%rsp),%rax /* CS */ + movq (2*8 + CS-RIP)(%rsp),%rax /* CS */ movq %rax,(2*8)(%rdi) - movq (4*8)(%rsp),%rax /* RFLAGS */ + movq (2*8 + EFLAGS-RIP)(%rsp),%rax /* RFLAGS */ movq %rax,(3*8)(%rdi) - movq (6*8)(%rsp),%rax /* SS */ + movq (2*8 + SS-RIP)(%rsp),%rax /* SS */ movq %rax,(5*8)(%rdi) - movq (5*8)(%rsp),%rax /* RSP */ + movq (2*8 + RSP-RIP)(%rsp),%rax /* RSP */ movq %rax,(4*8)(%rdi) andl $0xffff0000,%eax popq_cfi %rdi @@ -957,7 +1295,7 @@ ENTRY(retint_kernel) jmp exit_intr #endif CFI_ENDPROC -END(common_interrupt) +ENDPROC(common_interrupt) /* * End of kprobes section @@ -974,7 +1312,7 @@ ENTRY(\sym) interrupt \do_sym jmp ret_from_intr CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm #ifdef CONFIG_SMP @@ -1044,7 +1382,7 @@ ENTRY(\sym) call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm .macro paranoidzeroentry sym do_sym @@ -1061,10 +1399,10 @@ ENTRY(\sym) call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm -#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) +#define INIT_TSS_IST(x) (TSS_ist + ((x) - 1) * 8)(%r13) .macro paranoidzeroentry_ist sym do_sym ist ENTRY(\sym) INTR_FRAME @@ -1076,12 +1414,18 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ +#ifdef CONFIG_SMP + imul $TSS_size, PER_CPU_VAR(cpu_number), %r13d + lea init_tss(%r13), %r13 +#else + lea init_tss(%rip), %r13 +#endif subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) call \do_sym addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm .macro errorentry sym do_sym @@ -1098,7 +1442,7 @@ ENTRY(\sym) call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm /* error code is on the stack already */ @@ -1117,7 +1461,7 @@ ENTRY(\sym) call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -END(\sym) +ENDPROC(\sym) .endm zeroentry divide_error do_divide_error @@ -1147,9 +1491,10 @@ gs_change: 2: mfence /* workaround */ SWAPGS popfq_cfi + pax_force_retaddr ret CFI_ENDPROC -END(native_load_gs_index) +ENDPROC(native_load_gs_index) .section __ex_table,"a" .align 8 @@ -1171,13 +1516,14 @@ ENTRY(kernel_thread_helper) * Here we are in the child and the registers are set as they were * at kernel_thread() invocation in the parent. */ + pax_force_fptr %rsi call *%rsi # exit mov %eax, %edi call do_exit ud2 # padding for call trace CFI_ENDPROC -END(kernel_thread_helper) +ENDPROC(kernel_thread_helper) /* * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. @@ -1204,11 +1550,11 @@ ENTRY(kernel_execve) RESTORE_REST testq %rax,%rax je int_ret_from_sys_call - RESTORE_ARGS UNFAKE_STACK_FRAME + pax_force_retaddr ret CFI_ENDPROC -END(kernel_execve) +ENDPROC(kernel_execve) /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) @@ -1226,9 +1572,10 @@ ENTRY(call_softirq) CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 decl PER_CPU_VAR(irq_count) + pax_force_retaddr ret CFI_ENDPROC -END(call_softirq) +ENDPROC(call_softirq) #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback @@ -1266,7 +1613,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC -END(xen_do_hypervisor_callback) +ENDPROC(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. @@ -1325,7 +1672,7 @@ ENTRY(xen_failsafe_callback) SAVE_ALL jmp error_exit CFI_ENDPROC -END(xen_failsafe_callback) +ENDPROC(xen_failsafe_callback) apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ xen_hvm_callback_vector xen_evtchn_do_upcall @@ -1374,16 +1721,31 @@ ENTRY(paranoid_exit) TRACE_IRQS_OFF testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore - testl $3,CS(%rsp) + testb $3,CS(%rsp) jnz paranoid_userspace +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel + TRACE_IRQS_IRETQ 0 + SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + pax_force_retaddr_bts + jmp irq_return +#endif paranoid_swapgs: +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel_user +#else + pax_exit_kernel +#endif TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK RESTORE_ALL 8 jmp irq_return paranoid_restore: + pax_exit_kernel TRACE_IRQS_IRETQ 0 RESTORE_ALL 8 + pax_force_retaddr_bts jmp irq_return paranoid_userspace: GET_THREAD_INFO(%rcx) @@ -1412,7 +1774,7 @@ paranoid_schedule: TRACE_IRQS_OFF jmp paranoid_userspace CFI_ENDPROC -END(paranoid_exit) +ENDPROC(paranoid_exit) /* * Exception entry point. This expects an error code/orig_rax on the stack. @@ -1439,12 +1801,23 @@ ENTRY(error_entry) movq_cfi r14, R14+8 movq_cfi r15, R15+8 xorl %ebx,%ebx - testl $3,CS+8(%rsp) + testb $3,CS+8(%rsp) je error_kernelspace error_swapgs: SWAPGS error_sti: +#ifdef CONFIG_PAX_MEMORY_UDEREF + testb $3, CS+8(%rsp) + jnz 1f + pax_enter_kernel + jmp 2f +1: pax_enter_kernel_user +2: +#else + pax_enter_kernel +#endif TRACE_IRQS_OFF + pax_force_retaddr ret /* @@ -1478,7 +1851,7 @@ error_bad_iret: decl %ebx /* Return to usergs */ jmp error_sti CFI_ENDPROC -END(error_entry) +ENDPROC(error_entry) /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ @@ -1498,7 +1871,7 @@ ENTRY(error_exit) jnz retint_careful jmp retint_swapgs CFI_ENDPROC -END(error_exit) +ENDPROC(error_exit) /* runs on exception stack */ @@ -1510,6 +1883,7 @@ ENTRY(nmi) CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 call save_paranoid DEFAULT_FRAME 0 + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi movq $-1,%rsi @@ -1520,12 +1894,28 @@ ENTRY(nmi) DISABLE_INTERRUPTS(CLBR_NONE) testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore - testl $3,CS(%rsp) + testb $3,CS(%rsp) jnz nmi_userspace +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel + SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + pax_force_retaddr_bts + jmp irq_return +#endif nmi_swapgs: +#ifdef CONFIG_PAX_MEMORY_UDEREF + pax_exit_kernel_user +#else + pax_exit_kernel +#endif SWAPGS_UNSAFE_STACK + RESTORE_ALL 8 + jmp irq_return nmi_restore: + pax_exit_kernel RESTORE_ALL 8 + pax_force_retaddr_bts jmp irq_return nmi_userspace: GET_THREAD_INFO(%rcx) @@ -1554,14 +1944,14 @@ nmi_schedule: jmp paranoid_exit CFI_ENDPROC #endif -END(nmi) +ENDPROC(nmi) ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax sysret CFI_ENDPROC -END(ignore_sysret) +ENDPROC(ignore_sysret) /* * End of kprobes section diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 94d857f..6042d8a 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -70,8 +70,7 @@ static DEFINE_MUTEX(espfix_init_mutex); #define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) static void *espfix_pages[ESPFIX_MAX_PAGES]; -static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] - __aligned(PAGE_SIZE); +static pud_t espfix_pud_page[PTRS_PER_PUD] __page_aligned_rodata; static unsigned int page_random, slot_random; @@ -122,14 +121,16 @@ static void init_espfix_random(void) void __init init_espfix_bsp(void) { pgd_t *pgd_p; - pteval_t ptemask; - - ptemask = __supported_pte_mask; + unsigned long index = pgd_index(ESPFIX_BASE_ADDR); /* Install the espfix pud into the kernel page directory */ - pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_p = &init_level4_pgt[index]; pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); +#ifdef CONFIG_PAX_PER_CPU_PGD + clone_pgd_range(get_cpu_pgd(0) + index, swapper_pg_dir + index, 1); +#endif + /* Randomize the locations */ init_espfix_random(); @@ -197,7 +198,7 @@ void init_espfix_ap(void) set_pte(&pte_p[n*PTE_STRIDE], pte); /* Job is done for this CPU and any CPU which shares this page */ - ACCESS_ONCE(espfix_pages[page]) = stack_page; + ACCESS_ONCE_RW(espfix_pages[page]) = stack_page; unlock_done: mutex_unlock(&espfix_init_mutex); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c9a281f..3645760 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -126,7 +126,7 @@ static void *mod_code_ip; /* holds the IP to write to */ static const void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; -static atomic_t nmi_update_count = ATOMIC_INIT(0); +static atomic_unchecked_t nmi_update_count = ATOMIC_INIT(0); int ftrace_arch_read_dyn_info(char *buf, int size) { @@ -134,7 +134,7 @@ int ftrace_arch_read_dyn_info(char *buf, int size) r = snprintf(buf, size, "%u %u", nmi_wait_count, - atomic_read(&nmi_update_count)); + atomic_read_unchecked(&nmi_update_count)); return r; } @@ -178,7 +178,7 @@ void ftrace_nmi_enter(void) if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); ftrace_mod_code(); - atomic_inc(&nmi_update_count); + atomic_inc_unchecked(&nmi_update_count); } /* Must have previous changes seen before executions */ smp_mb(); @@ -271,6 +271,8 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, { unsigned char replaced[MCOUNT_INSN_SIZE]; + ip = ktla_ktva(ip); + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -327,7 +329,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) unsigned char old[MCOUNT_INSN_SIZE], *new; int ret; - memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); + memcpy(old, ktla_ktva((void *)ftrace_call), MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, (unsigned long)func); ret = ftrace_modify_code(ip, old, new); @@ -353,6 +355,8 @@ static int ftrace_mod_jmp(unsigned long ip, { unsigned char code[MCOUNT_INSN_SIZE]; + ip = ktla_ktva(ip); + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3bb0850..55a56f4 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -19,6 +19,7 @@ #include #include #include +#include static void __init i386_default_early_setup(void) { @@ -33,7 +34,7 @@ void __init i386_start_kernel(void) { memblock_init(); - memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); + memblock_x86_reserve_range(LOAD_PHYSICAL_ADDR, __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ce0be7c..a61dc21 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -25,6 +25,12 @@ /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) +#ifdef CONFIG_PAX_KERNEXEC +#define ta(X) (X) +#else +#define ta(X) ((X) - __PAGE_OFFSET) +#endif + /* * References to members of the new_cpu_data structure. */ @@ -54,11 +60,7 @@ * and small than max_low_pfn, otherwise will waste some page table entries */ -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PTE) /* Number of possible pages in the lowmem region */ LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) @@ -77,6 +79,12 @@ INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE RESERVE_BRK(pagetables, INIT_MAP_SIZE) /* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) + +/* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, * %esi points to the real-mode code as a 32-bit pointer. * CS and DS must be 4 GB flat segments, but we don't depend on @@ -84,6 +92,13 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) * can. */ __HEAD + +#ifdef CONFIG_PAX_KERNEXEC + jmp startup_32 +/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ +.fill PAGE_SIZE-5,1,0xcc +#endif + ENTRY(startup_32) movl pa(stack_start),%ecx @@ -105,6 +120,59 @@ ENTRY(startup_32) 2: leal -__PAGE_OFFSET(%ecx),%esp +#ifdef CONFIG_SMP + movl $pa(cpu_gdt_table),%edi + movl $__per_cpu_load,%eax + movw %ax,GDT_ENTRY_PERCPU * 8 + 2(%edi) + rorl $16,%eax + movb %al,GDT_ENTRY_PERCPU * 8 + 4(%edi) + movb %ah,GDT_ENTRY_PERCPU * 8 + 7(%edi) + movl $__per_cpu_end - 1,%eax + subl $__per_cpu_start,%eax + movw %ax,GDT_ENTRY_PERCPU * 8 + 0(%edi) +#endif + +#ifdef CONFIG_PAX_MEMORY_UDEREF + movl $NR_CPUS,%ecx + movl $pa(cpu_gdt_table),%edi +1: + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),GDT_ENTRY_KERNEL_DS * 8 + 4(%edi) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0fb00),GDT_ENTRY_DEFAULT_USER_CS * 8 + 4(%edi) + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),GDT_ENTRY_DEFAULT_USER_DS * 8 + 4(%edi) + addl $PAGE_SIZE_asm,%edi + loop 1b +#endif + +#ifdef CONFIG_PAX_KERNEXEC + movl $pa(boot_gdt),%edi + movl $__LOAD_PHYSICAL_ADDR,%eax + movw %ax,GDT_ENTRY_BOOT_CS * 8 + 2(%edi) + rorl $16,%eax + movb %al,GDT_ENTRY_BOOT_CS * 8 + 4(%edi) + movb %ah,GDT_ENTRY_BOOT_CS * 8 + 7(%edi) + rorl $16,%eax + + ljmp $(__BOOT_CS),$1f +1: + + movl $NR_CPUS,%ecx + movl $pa(cpu_gdt_table),%edi + addl $__PAGE_OFFSET,%eax +1: + movb $0xc0,GDT_ENTRY_KERNEL_CS * 8 + 6(%edi) + movb $0xc0,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 6(%edi) + movw %ax,GDT_ENTRY_KERNEL_CS * 8 + 2(%edi) + movw %ax,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 2(%edi) + rorl $16,%eax + movb %al,GDT_ENTRY_KERNEL_CS * 8 + 4(%edi) + movb %al,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 4(%edi) + movb %ah,GDT_ENTRY_KERNEL_CS * 8 + 7(%edi) + movb %ah,GDT_ENTRY_KERNEXEC_KERNEL_CS * 8 + 7(%edi) + rorl $16,%eax + addl $PAGE_SIZE_asm,%edi + loop 1b +#endif + /* * Clear BSS first so that there are no surprises... */ @@ -195,8 +263,11 @@ ENTRY(startup_32) movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) +#ifdef CONFIG_COMPAT_VDSO + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR+_PAGE_USER,pa(initial_pg_pmd+0x1000*KPMDS-8) +#else + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,pa(initial_pg_pmd+0x1000*KPMDS-8) +#endif #else /* Not PAE */ page_pde_offset = (__PAGE_OFFSET >> 20); @@ -226,8 +297,11 @@ page_pde_offset = (__PAGE_OFFSET >> 20); movl %eax, pa(max_pfn_mapped) /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax - movl %eax,pa(initial_page_table+0xffc) +#ifdef CONFIG_COMPAT_VDSO + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR+_PAGE_USER,pa(initial_page_table+0xffc) +#else + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,pa(initial_page_table+0xffc) +#endif #endif #ifdef CONFIG_PARAVIRT @@ -241,9 +315,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); cmpl $num_subarch_entries, %eax jae bad_subarch - movl pa(subarch_entries)(,%eax,4), %eax - subl $__PAGE_OFFSET, %eax - jmp *%eax + jmp *pa(subarch_entries)(,%eax,4) bad_subarch: WEAK(lguest_entry) @@ -255,10 +327,10 @@ WEAK(xen_entry) __INITDATA subarch_entries: - .long default_entry /* normal x86/PC */ - .long lguest_entry /* lguest hypervisor */ - .long xen_entry /* Xen hypervisor */ - .long default_entry /* Moorestown MID */ + .long ta(default_entry) /* normal x86/PC */ + .long ta(lguest_entry) /* lguest hypervisor */ + .long ta(xen_entry) /* Xen hypervisor */ + .long ta(default_entry) /* Moorestown MID */ num_subarch_entries = (. - subarch_entries) / 4 .previous #else @@ -312,6 +384,7 @@ default_entry: orl %edx,%eax movl %eax,%cr4 +#ifdef CONFIG_X86_PAE testb $X86_CR4_PAE, %al # check if PAE is enabled jz 6f @@ -340,6 +413,9 @@ default_entry: /* Make changes effective */ wrmsr + btsl $_PAGE_BIT_NX-32,pa(__supported_pte_mask+4) +#endif + 6: /* @@ -443,7 +519,7 @@ is386: movl $2,%ecx # set MP 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl $(__USER_DS),%eax # DS/ES contains default USER segment +# movl $(__KERNEL_DS),%eax # DS/ES contains default KERNEL segment movl %eax,%ds movl %eax,%es @@ -457,15 +533,22 @@ is386: movl $2,%ecx # set MP */ cmpb $0,ready jne 1f - movl $gdt_page,%eax + movl $cpu_gdt_table,%eax movl $stack_canary,%ecx +#ifdef CONFIG_SMP + addl $__per_cpu_load,%ecx +#endif movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) 1: -#endif movl $(__KERNEL_STACK_CANARY),%eax +#elif defined(CONFIG_PAX_MEMORY_UDEREF) + movl $(__USER_DS),%eax +#else + xorl %eax,%eax +#endif movl %eax,%gs xorl %eax,%eax # Clear LDT @@ -558,22 +641,22 @@ early_page_fault: jmp early_fault early_fault: - cld #ifdef CONFIG_PRINTK + cmpl $1,%ss:early_recursion_flag + je hlt_loop + incl %ss:early_recursion_flag + cld pusha movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag movl %cr2,%eax pushl %eax pushl %edx /* trapno */ pushl $fault_msg call printk +; call dump_stack #endif - call dump_stack hlt_loop: hlt jmp hlt_loop @@ -581,8 +664,11 @@ hlt_loop: /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: - cld #ifdef CONFIG_PRINTK + cmpl $2,%ss:early_recursion_flag + je hlt_loop + incl %ss:early_recursion_flag + cld pushl %eax pushl %ecx pushl %edx @@ -591,9 +677,6 @@ ignore_int: movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag pushl 16(%esp) pushl 24(%esp) pushl 32(%esp) @@ -622,29 +705,43 @@ ENTRY(initial_code) /* * BSS section */ -__PAGE_ALIGNED_BSS - .align PAGE_SIZE #ifdef CONFIG_X86_PAE +.section .initial_pg_pmd,"a",@progbits initial_pg_pmd: .fill 1024*KPMDS,4,0 #else +.section .initial_page_table,"a",@progbits ENTRY(initial_page_table) .fill 1024,4,0 #endif +.section .initial_pg_fixmap,"a",@progbits initial_pg_fixmap: .fill 1024,4,0 +.section .empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 +.section .swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) +#ifdef CONFIG_X86_PAE + .fill 4,8,0 +#else .fill 1024,4,0 +#endif + +/* + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 /* * This starts the data section. */ #ifdef CONFIG_X86_PAE -__PAGE_ALIGNED_DATA - /* Page-aligned for the benefit of paravirt? */ - .align PAGE_SIZE +.section .initial_page_table,"a",@progbits ENTRY(initial_page_table) .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 @@ -663,18 +760,27 @@ ENTRY(initial_page_table) # error "Kernel PMDs should be 1, 2 or 3" # endif .align PAGE_SIZE /* needs to be page-sized too */ + +#ifdef CONFIG_PAX_PER_CPU_PGD +ENTRY(cpu_pgd) + .rept NR_CPUS + .fill 4,8,0 + .endr +#endif + #endif .data .balign 4 ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-8 +ready: .byte 0 + +.section .rodata,"a",@progbits early_recursion_flag: .long 0 -ready: .byte 0 - int_msg: .asciz "Unknown interrupt or fault at: %p %p %p\n" @@ -707,7 +813,7 @@ fault_msg: .word 0 # 32 bit align gdt_desc.address boot_gdt_descr: .word __BOOT_DS+7 - .long boot_gdt - __PAGE_OFFSET + .long pa(boot_gdt) .word 0 # 32-bit align idt_desc.address idt_descr: @@ -718,7 +824,7 @@ idt_descr: .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) .word GDT_ENTRIES*8-1 - .long gdt_page /* Overwritten for secondary CPUs */ + .long cpu_gdt_table /* Overwritten for secondary CPUs */ /* * The boot_gdt must mirror the equivalent in setup.S and is @@ -727,5 +833,65 @@ ENTRY(early_gdt_descr) .align L1_CACHE_BYTES ENTRY(boot_gdt) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ + + .align PAGE_SIZE_asm +ENTRY(cpu_gdt_table) + .rept NR_CPUS + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* 0x0b reserved */ + .quad 0x0000000000000000 /* 0x13 reserved */ + .quad 0x0000000000000000 /* 0x1b reserved */ + +#ifdef CONFIG_PAX_KERNEXEC + .quad 0x00cf9b000000ffff /* 0x20 alternate kernel 4GB code at 0x00000000 */ +#else + .quad 0x0000000000000000 /* 0x20 unused */ +#endif + + .quad 0x0000000000000000 /* 0x28 unused */ + .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ + .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ + .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ + .quad 0x0000000000000000 /* 0x4b reserved */ + .quad 0x0000000000000000 /* 0x53 reserved */ + .quad 0x0000000000000000 /* 0x5b reserved */ + + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ + + .quad 0x0000000000000000 /* 0x80 TSS descriptor */ + .quad 0x0000000000000000 /* 0x88 LDT descriptor */ + + /* + * Segments used for calling PnP BIOS have byte granularity. + * The code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + .quad 0x00409b000000ffff /* 0x90 32-bit code */ + .quad 0x00009b000000ffff /* 0x98 16-bit code */ + .quad 0x000093000000ffff /* 0xa0 16-bit data */ + .quad 0x0000930000000000 /* 0xa8 16-bit data */ + .quad 0x0000930000000000 /* 0xb0 16-bit data */ + + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + .quad 0x00409b000000ffff /* 0xb8 APM CS code */ + .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004093000000ffff /* 0xc8 APM DS data */ + + .quad 0x00c093000000ffff /* 0xd0 - ESPFIX SS */ + .quad 0x0040930000000000 /* 0xd8 - PERCPU */ + .quad 0x0040910000000017 /* 0xe0 - STACK_CANARY */ + .quad 0x0000000000000000 /* 0xe8 - PCIBIOS_CS */ + .quad 0x0000000000000000 /* 0xf0 - PCIBIOS_DS */ + .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm - GDT_SIZE,1,0 + .endr diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e11e394..3306b50 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,8 @@ #include #include #include +#include +#include #ifdef CONFIG_PARAVIRT #include @@ -38,6 +40,12 @@ L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET) L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET) L4_START_KERNEL = pgd_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) +L4_VMALLOC_START = pgd_index(VMALLOC_START) +L3_VMALLOC_START = pud_index(VMALLOC_START) +L4_VMALLOC_END = pgd_index(VMALLOC_END) +L3_VMALLOC_END = pud_index(VMALLOC_END) +L4_VMEMMAP_START = pgd_index(VMEMMAP_START) +L3_VMEMMAP_START = pud_index(VMEMMAP_START) .text __HEAD @@ -85,35 +93,25 @@ startup_64: */ addq %rbp, init_level4_pgt + 0(%rip) addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) + addq %rbp, init_level4_pgt + (L4_VMALLOC_START*8)(%rip) + addq %rbp, init_level4_pgt + (L4_VMALLOC_END*8)(%rip) + addq %rbp, init_level4_pgt + (L4_VMEMMAP_START*8)(%rip) addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) addq %rbp, level3_ident_pgt + 0(%rip) +#ifndef CONFIG_XEN + addq %rbp, level3_ident_pgt + 8(%rip) +#endif - addq %rbp, level3_kernel_pgt + (510*8)(%rip) - addq %rbp, level3_kernel_pgt + (511*8)(%rip) + addq %rbp, level3_vmemmap_pgt + (L3_VMEMMAP_START*8)(%rip) + addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8)(%rip) + addq %rbp, level3_kernel_pgt + (L3_START_KERNEL*8+8)(%rip) + + addq %rbp, level2_fixmap_pgt + (504*8)(%rip) + addq %rbp, level2_fixmap_pgt + (505*8)(%rip) addq %rbp, level2_fixmap_pgt + (506*8)(%rip) - - /* Add an Identity mapping if I am above 1G */ - leaq _text(%rip), %rdi - andq $PMD_PAGE_MASK, %rdi - - movq %rdi, %rax - shrq $PUD_SHIFT, %rax - andq $(PTRS_PER_PUD - 1), %rax - jz ident_complete - - leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx - leaq level3_ident_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) - - movq %rdi, %rax - shrq $PMD_SHIFT, %rax - andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx - leaq level2_spare_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) -ident_complete: + addq %rbp, level2_fixmap_pgt + (507*8)(%rip) /* * Fixup the kernel text+data virtual addresses. Note that @@ -160,8 +158,8 @@ ENTRY(secondary_startup_64) * after the boot processor executes this code. */ - /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %eax + /* Enable PAE mode and PSE/PGE */ + movl $(X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE), %eax movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ @@ -183,9 +181,21 @@ ENTRY(secondary_startup_64) movl $MSR_EFER, %ecx rdmsr btsl $_EFER_SCE, %eax /* Enable System Call */ - btl $20,%edi /* No Execute supported? */ + btl $(X86_FEATURE_NX & 31),%edi /* No Execute supported? */ jnc 1f btsl $_EFER_NX, %eax + cmpb $0, init_level4_pgt_initialized(%rip) + jne 1f + btsq $_PAGE_BIT_NX, init_level4_pgt + 8*L4_PAGE_OFFSET(%rip) + btsq $_PAGE_BIT_NX, init_level4_pgt + 8*L4_VMALLOC_START(%rip) + btsq $_PAGE_BIT_NX, init_level4_pgt + 8*L4_VMALLOC_END(%rip) + btsq $_PAGE_BIT_NX, init_level4_pgt + 8*L4_VMEMMAP_START(%rip) + btsq $_PAGE_BIT_NX, level2_fixmap_pgt + 8*504(%rip) + btsq $_PAGE_BIT_NX, level2_fixmap_pgt + 8*505(%rip) + btsq $_PAGE_BIT_NX, level2_fixmap_pgt + 8*506(%rip) + btsq $_PAGE_BIT_NX, level2_fixmap_pgt + 8*507(%rip) + btsq $_PAGE_BIT_NX, __supported_pte_mask(%rip) + movb $1, init_level4_pgt_initialized(%rip) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ @@ -247,6 +257,7 @@ ENTRY(secondary_startup_64) * jump. In addition we need to ensure %cs is set so we make this * a far return. */ + pax_set_fptr_mask movq initial_code(%rip),%rax pushq $0 # fake return address to stop unwinder pushq $__KERNEL_CS # set correct cs @@ -262,14 +273,14 @@ ENTRY(secondary_startup_64) .quad INIT_PER_CPU_VAR(irq_stack_union) ENTRY(stack_start) - .quad init_thread_union+THREAD_SIZE-8 + .quad init_thread_union+THREAD_SIZE-16 .word 0 __FINITDATA bad_address: jmp bad_address - .section ".init.text","ax" + __INIT #ifdef CONFIG_EARLY_PRINTK .globl early_idt_handlers early_idt_handlers: @@ -314,18 +325,23 @@ ENTRY(early_idt_handler) #endif /* EARLY_PRINTK */ 1: hlt jmp 1b + .previous #ifdef CONFIG_EARLY_PRINTK + __INITDATA early_recursion_flag: .long 0 + .previous + .section .rodata,"a",@progbits early_idt_msg: .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" early_idt_ripmsg: .asciz "RIP %s\n" + .previous #endif /* CONFIG_EARLY_PRINTK */ - .previous + .section .rodata,"a",@progbits #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ ENTRY(name) @@ -338,7 +354,6 @@ ENTRY(name) i = i + 1 ; \ .endr - .data /* * This default setting generates an ident mapping at address 0x100000 * and a mapping for the kernel that precisely maps virtual address @@ -349,13 +364,41 @@ NEXT_PAGE(init_level4_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_VMALLOC_START*8, 0 + .quad level3_vmalloc_start_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_VMALLOC_END*8, 0 + .quad level3_vmalloc_end_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_VMEMMAP_START*8, 0 + .quad level3_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE .org init_level4_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE +#ifdef CONFIG_PAX_PER_CPU_PGD +NEXT_PAGE(cpu_pgd) + .rept NR_CPUS + .fill 512,8,0 + .endr +#endif + NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE +#ifdef CONFIG_XEN .fill 511,8,0 +#else + .quad level2_ident_pgt + PAGE_SIZE - __START_KERNEL_map + _KERNPG_TABLE + .fill 510,8,0 +#endif + +NEXT_PAGE(level3_vmalloc_start_pgt) + .fill 512,8,0 + +NEXT_PAGE(level3_vmalloc_end_pgt) + .fill 512,8,0 + +NEXT_PAGE(level3_vmemmap_pgt) + .fill L3_VMEMMAP_START,8,0 + .quad level2_vmemmap_pgt - __START_KERNEL_map + _KERNPG_TABLE NEXT_PAGE(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 @@ -363,20 +406,29 @@ NEXT_PAGE(level3_kernel_pgt) .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE +NEXT_PAGE(level2_vmemmap_pgt) + .fill 512,8,0 + NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 + .fill 504,8,0 + .quad level1_fixmap_pgt - __START_KERNEL_map + 0 * PAGE_SIZE + _PAGE_TABLE + .quad level1_fixmap_pgt - __START_KERNEL_map + 1 * PAGE_SIZE + _PAGE_TABLE + .quad level1_fixmap_pgt - __START_KERNEL_map + 2 * PAGE_SIZE + _PAGE_TABLE + .quad level1_vsyscall_pgt - __START_KERNEL_map + _PAGE_TABLE + /* 6MB reserved for vsyscalls + a 2MB hole = 3 + 1 entries */ + .fill 4,8,0 NEXT_PAGE(level1_fixmap_pgt) + .fill 3*512,8,0 + +NEXT_PAGE(level1_vsyscall_pgt) .fill 512,8,0 -NEXT_PAGE(level2_ident_pgt) - /* Since I easily can, map the first 1G. + /* Since I easily can, map the first 2G. * Don't set NX because code runs from these pages. */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +NEXT_PAGE(level2_ident_pgt) + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, 2*PTRS_PER_PMD) NEXT_PAGE(level2_kernel_pgt) /* @@ -389,35 +441,59 @@ NEXT_PAGE(level2_kernel_pgt) * If you want to increase this then increase MODULES_VADDR * too.) */ - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, - KERNEL_IMAGE_SIZE/PMD_SIZE) - -NEXT_PAGE(level2_spare_pgt) - .fill 512, 8, 0 + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) #undef PMDS #undef NEXT_PAGE - .data + .align PAGE_SIZE +ENTRY(cpu_gdt_table) + .rept NR_CPUS + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ + .quad 0x00af9b000000ffff /* __KERNEL_CS */ + .quad 0x00cf93000000ffff /* __KERNEL_DS */ + .quad 0x00cffb000000ffff /* __USER32_CS */ + .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ + .quad 0x00affb000000ffff /* __USER_CS */ + +#ifdef CONFIG_PAX_KERNEXEC + .quad 0x00af9b000000ffff /* __KERNEXEC_KERNEL_CS */ +#else + .quad 0x0 /* unused */ +#endif + + .quad 0,0 /* TSS */ + .quad 0,0 /* LDT */ + .quad 0,0,0 /* three TLS descriptors */ + .quad 0x0000f40000000000 /* node/CPU stored in limit */ + /* asm/segment.h:GDT_ENTRIES must match this */ + + /* zero the remaining page */ + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 + .endr + .align 16 .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 early_gdt_descr_base: - .quad INIT_PER_CPU_VAR(gdt_page) + .quad cpu_gdt_table ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 +init_level4_pgt_initialized: + .byte 0x00 + #include "../../x86/xen/xen-head.S" - - .section .bss, "aw", @nobits + + .section .rodata,"a",@progbits .align L1_CACHE_BYTES ENTRY(idt_table) - .skip IDT_ENTRIES * 16 + .fill 512,8,0 - __PAGE_ALIGNED_BSS .align PAGE_SIZE ENTRY(empty_zero_page) .skip PAGE_SIZE diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 9c3bd4a..e1d9b35 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -20,8 +20,12 @@ extern void cmpxchg8b_emu(void); EXPORT_SYMBOL(cmpxchg8b_emu); #endif +EXPORT_SYMBOL_GPL(cpu_gdt_table); + /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(csum_partial_copy_generic_to_user); +EXPORT_SYMBOL(csum_partial_copy_generic_from_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -36,3 +40,7 @@ EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PAX_KERNEXEC +EXPORT_SYMBOL(__LOAD_PHYSICAL_ADDR); +#endif diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 6104852..47826ae 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -111,7 +111,7 @@ static int i8259A_irq_pending(unsigned int irq) static void make_8259A_irq(unsigned int irq) { disable_irq_nosync(irq); - io_apic_irqs &= ~(1< #include #include +#include #include #include #include @@ -30,6 +31,12 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) return -EINVAL; if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on && grsec_disable_privio) { + gr_handle_ioperm(); + return -ENODEV; + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the @@ -54,7 +61,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); if (turn_on) bitmap_clear(t->io_bitmap_ptr, from, num); @@ -104,6 +111,12 @@ long sys_iopl(unsigned int level, struct pt_regs *regs) if (level > old) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#ifdef CONFIG_GRKERNSEC_IO + if (grsec_disable_privio) { + gr_handle_iopl(); + return -ENODEV; + } +#endif } regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); t->iopl = level << 12; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 687637b..3e626d9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -18,7 +18,7 @@ #include #include -atomic_t irq_err_count; +atomic_unchecked_t irq_err_count; /* Function pointer for generic interrupt vector handling */ void (*x86_platform_ipi_callback)(void) = NULL; @@ -117,9 +117,9 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_printf(p, " Machine check polls\n"); #endif - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read_unchecked(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); + seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read_unchecked(&irq_mis_count)); #endif return 0; } @@ -159,7 +159,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) u64 arch_irq_stat(void) { - u64 sum = atomic_read(&irq_err_count); + u64 sum = atomic_read_unchecked(&irq_err_count); return sum; } diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7209070..ada4d63 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -28,6 +28,9 @@ DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); #ifdef CONFIG_DEBUG_STACKOVERFLOW + +extern void gr_handle_kernel_exploit(void); + /* Debugging check for stack overflow: is there less than 1KB free? */ static int check_stack_overflow(void) { @@ -36,13 +39,14 @@ static int check_stack_overflow(void) __asm__ __volatile__("andl %%esp,%0" : "=r" (sp) : "0" (THREAD_SIZE - 1)); - return sp < (sizeof(struct thread_info) + STACK_WARN); + return sp < STACK_WARN; } static void print_stack_overflow(void) { printk(KERN_WARNING "low stack detected by irq handler\n"); dump_stack(); + gr_handle_kernel_exploit(); } #else @@ -54,8 +58,8 @@ static inline void print_stack_overflow(void) { } * per-CPU IRQ handling contexts (thread information and stack) */ union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; + unsigned long previous_esp; + u32 stack[THREAD_SIZE/sizeof(u32)]; } __attribute__((aligned(THREAD_SIZE))); static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); @@ -75,10 +79,9 @@ static void call_on_stack(void *func, void *stack) static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { - union irq_ctx *curctx, *irqctx; + union irq_ctx *irqctx; u32 *isp, arg1, arg2; - curctx = (union irq_ctx *) current_thread_info(); irqctx = __this_cpu_read(hardirq_ctx); /* @@ -87,21 +90,16 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) * handler) we can't do that and just have to keep using the * current stack (which is the irq stack already after all) */ - if (unlikely(curctx == irqctx)) + if (unlikely((void *)current_stack_pointer - (void *)irqctx < THREAD_SIZE)) return 0; /* build the stack frame on the IRQ stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; + isp = (u32 *) ((char *)irqctx + sizeof(*irqctx) - 8); + irqctx->previous_esp = current_stack_pointer; - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(MAKE_MM_SEG(0)); +#endif if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -113,6 +111,11 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) : "memory", "cc", "ecx"); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(current_thread_info()->addr_limit); +#endif + return 1; } @@ -121,29 +124,11 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) */ void __cpuinit irq_ctx_init(int cpu) { - union irq_ctx *irqctx; - if (per_cpu(hardirq_ctx, cpu)) return; - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(hardirq_ctx, cpu) = irqctx; - - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), - THREAD_FLAGS, - THREAD_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(softirq_ctx, cpu) = irqctx; + per_cpu(hardirq_ctx, cpu) = page_address(alloc_pages_node(cpu_to_node(cpu), THREAD_FLAGS, THREAD_ORDER)); + per_cpu(softirq_ctx, cpu) = page_address(alloc_pages_node(cpu_to_node(cpu), THREAD_FLAGS, THREAD_ORDER)); printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); @@ -152,7 +137,6 @@ void __cpuinit irq_ctx_init(int cpu) asmlinkage void do_softirq(void) { unsigned long flags; - struct thread_info *curctx; union irq_ctx *irqctx; u32 *isp; @@ -162,15 +146,22 @@ asmlinkage void do_softirq(void) local_irq_save(flags); if (local_softirq_pending()) { - curctx = current_thread_info(); irqctx = __this_cpu_read(softirq_ctx); - irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; + irqctx->previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + isp = (u32 *) ((char *)irqctx + sizeof(*irqctx) - 8); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(MAKE_MM_SEG(0)); +#endif call_on_stack(__do_softirq, isp); + +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(current_thread_info()->addr_limit); +#endif + /* * Shouldn't happen, we returned above if in_interrupt(): */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 69bca46..e38f147 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,6 +26,8 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); +extern void gr_handle_kernel_exploit(void); + /* * Probabilistic stack overflow check: * @@ -38,16 +40,17 @@ static inline void stack_overflow_check(struct pt_regs *regs) #ifdef CONFIG_DEBUG_STACKOVERFLOW u64 curbase = (u64)task_stack_page(current); - if (user_mode_vm(regs)) + if (user_mode(regs)) return; - WARN_ONCE(regs->sp >= curbase && - regs->sp <= curbase + THREAD_SIZE && - regs->sp < curbase + sizeof(struct thread_info) + - sizeof(struct pt_regs) + 128, - - "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", + if (regs->sp >= curbase && + regs->sp <= curbase + THREAD_SIZE && + regs->sp < curbase + sizeof(struct thread_info) + + sizeof(struct pt_regs) + 128) { + WARN_ONCE(1, "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", current->comm, curbase, regs->sp); + gr_handle_kernel_exploit(); + } #endif } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 2f45c4c..3f51a0c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) #ifdef CONFIG_X86_32 switch (regno) { case GDB_SS: - if (!user_mode_vm(regs)) + if (!user_mode(regs)) *(unsigned long *)mem = __KERNEL_DS; break; case GDB_SP: - if (!user_mode_vm(regs)) + if (!user_mode(regs)) *(unsigned long *)mem = kernel_stack_pointer(regs); break; case GDB_GS: @@ -228,7 +228,10 @@ static void kgdb_correct_hw_break(void) bp->attr.bp_addr = breakinfo[breakno].addr; bp->attr.bp_len = breakinfo[breakno].len; bp->attr.bp_type = breakinfo[breakno].type; - info->address = breakinfo[breakno].addr; + if (breakinfo[breakno].type == X86_BREAKPOINT_EXECUTE) + info->address = ktla_ktva(breakinfo[breakno].addr); + else + info->address = breakinfo[breakno].addr; info->len = breakinfo[breakno].len; info->type = breakinfo[breakno].type; val = arch_install_hw_breakpoint(bp); @@ -475,12 +478,12 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, case 'k': /* clear the trace bit */ linux_regs->flags &= ~X86_EFLAGS_TF; - atomic_set(&kgdb_cpu_doing_single_step, -1); + atomic_set_unchecked(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; - atomic_set(&kgdb_cpu_doing_single_step, + atomic_set_unchecked(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); } @@ -545,7 +548,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) switch (cmd) { case DIE_DEBUG: - if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { + if (atomic_read_unchecked(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) return single_step_cont(regs, args); break; @@ -748,11 +751,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) char opc[BREAK_INSTR_SIZE]; bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = probe_kernel_read(bpt->saved_instr, ktla_ktva((char *)bpt->bpt_addr), BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = probe_kernel_write(ktla_ktva((char *)bpt->bpt_addr), arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); #ifdef CONFIG_DEBUG_RODATA if (!err) @@ -765,7 +768,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) return -EBUSY; text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); - err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, ktla_ktva((char *)bpt->bpt_addr), BREAK_INSTR_SIZE); if (err) return err; if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) @@ -790,13 +793,13 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) if (mutex_is_locked(&text_mutex)) goto knl_write; text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE); - err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, ktla_ktva((char *)bpt->bpt_addr), BREAK_INSTR_SIZE); if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) goto knl_write; return err; knl_write: #endif /* CONFIG_DEBUG_RODATA */ - return probe_kernel_write((char *)bpt->bpt_addr, + return probe_kernel_write(ktla_ktva((char *)bpt->bpt_addr), (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 083848f..69321f0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -117,9 +117,12 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) s32 raddr; } __attribute__((packed)) *insn; - insn = (struct __arch_relative_insn *)from; + insn = (struct __arch_relative_insn *)ktla_ktva(from); + + pax_open_kernel(); insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); insn->op = op; + pax_close_kernel(); } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ @@ -156,7 +159,7 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes) kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables(ktva_ktla((unsigned long)opcodes))) return 0; /* Page fault may occur on this address. */ retry: @@ -228,7 +231,7 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) * for the first byte, we can recover the original instruction * from it and kp->opcode. */ - memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + memcpy(buf, ktla_ktva(kp->addr), MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); buf[0] = kp->opcode; return 0; } @@ -264,7 +267,7 @@ static int __kprobes can_probe(unsigned long paddr) * recover it. */ return 0; - kernel_insn_init(&insn, buf); + kernel_insn_init(&insn, ktva_ktla(buf)); } insn_get_length(&insn); addr += insn.length; @@ -313,11 +316,13 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) (unsigned long)src); if (ret) return 0; - kernel_insn_init(&insn, buf); + kernel_insn_init(&insn, ktva_ktla(buf)); } } insn_get_length(&insn); + pax_open_kernel(); memcpy(dest, insn.kaddr, insn.length); + pax_close_kernel(); #ifdef CONFIG_X86_64 if (insn_rip_relative(&insn)) { @@ -341,7 +346,9 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) (u8 *) dest; BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ disp = (u8 *) dest + insn_offset_displacement(&insn); + pax_open_kernel(); *(s32 *) disp = (s32) newdisp; + pax_close_kernel(); } #endif return insn.length; @@ -355,12 +362,12 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) */ __copy_instruction(p->ainsn.insn, p->addr, 0); - if (can_boost(p->addr)) + if (can_boost(ktla_ktva(p->addr))) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; - p->opcode = *p->addr; + p->opcode = *(ktla_ktva(p->addr)); } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -477,7 +484,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, * nor set current_kprobe, because it doesn't use single * stepping. */ - regs->ip = (unsigned long)p->ainsn.insn; + regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); preempt_enable_no_resched(); return; } @@ -494,9 +501,9 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, regs->flags &= ~X86_EFLAGS_IF; /* single step inline if the instruction is an int3 */ if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->ip = (unsigned long)p->addr; + regs->ip = ktla_ktva((unsigned long)p->addr); else - regs->ip = (unsigned long)p->ainsn.insn; + regs->ip = ktva_ktla((unsigned long)p->ainsn.insn); } /* @@ -575,7 +582,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) setup_singlestep(p, regs, kcb, 0); return 1; } - } else if (*addr != BREAKPOINT_INSTRUCTION) { + } else if (*(kprobe_opcode_t *)ktla_ktva((unsigned long)addr) != BREAKPOINT_INSTRUCTION) { /* * The breakpoint instruction was removed right * after we hit it. Another cpu has removed @@ -683,6 +690,9 @@ static void __used __kprobes kretprobe_trampoline_holder(void) " movq %rax, 152(%rsp)\n" RESTORE_REGS_STRING " popfq\n" +#ifdef KERNEXEC_PLUGIN + " btsq $63,(%rsp)\n" +#endif #else " pushf\n" SAVE_REGS_STRING @@ -820,7 +830,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); - unsigned long copy_ip = (unsigned long)p->ainsn.insn; + unsigned long copy_ip = ktva_ktla((unsigned long)p->ainsn.insn); unsigned long orig_ip = (unsigned long)p->addr; kprobe_opcode_t *insn = p->ainsn.insn; @@ -1002,7 +1012,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = data; int ret = NOTIFY_DONE; - if (args->regs && user_mode_vm(args->regs)) + if (args->regs && user_mode(args->regs)) return ret; switch (val) { @@ -1130,6 +1140,7 @@ static void __kprobes synthesize_relcall(void *from, void *to) static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) { + pax_open_kernel(); #ifdef CONFIG_X86_64 *addr++ = 0x48; *addr++ = 0xbf; @@ -1137,6 +1148,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, *addr++ = 0xb8; #endif *(unsigned long *)addr = val; + pax_close_kernel(); } static void __used __kprobes kprobes_optinsn_template_holder(void) @@ -1317,7 +1329,7 @@ static int __kprobes can_optimize(unsigned long paddr) ret = recover_probed_instruction(buf, addr); if (ret) return 0; - kernel_insn_init(&insn, buf); + kernel_insn_init(&insn, ktva_ktla(buf)); } insn_get_length(&insn); /* Recover address */ @@ -1394,7 +1406,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)op->optinsn.insn - ktla_ktva((long)op->kp.addr) + RELATIVEJUMP_SIZE; if (abs(rel) > 0x7fffffff) return -ERANGE; @@ -1409,16 +1421,18 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) op->optinsn.size = ret; /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + pax_open_kernel(); + memcpy(buf, ktla_ktva(&optprobe_template_entry), TMPL_END_IDX); + pax_close_kernel(); /* Set probe information */ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + synthesize_relcall(ktva_ktla(buf) + TMPL_CALL_IDX, optimized_callback); /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + synthesize_reljump(ktva_ktla(buf) + TMPL_END_IDX + op->optinsn.size, (u8 *)op->kp.addr + op->optinsn.size); flush_icache_range((unsigned long) buf, @@ -1441,7 +1455,7 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, ((long)op->kp.addr + RELATIVEJUMP_SIZE)); /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, + memcpy(op->optinsn.copied_insn, ktla_ktva(op->kp.addr) + INT3_SIZE, RELATIVE_ADDR_SIZE); insn_buf[0] = RELATIVEJUMP_OPCODE; @@ -1540,7 +1554,7 @@ static int __kprobes setup_detour_execution(struct kprobe *p, /* This kprobe is really able to run optimized path. */ op = container_of(p, struct optimized_kprobe, kp); /* Detour through copied instructions */ - regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; + regs->ip = ktva_ktla((unsigned long)op->optinsn.insn) + TMPL_END_IDX; if (!reenter) reset_current_kprobe(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4b6701e..1a3dcdb 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -444,6 +444,7 @@ static void __init paravirt_ops_setup(void) pv_mmu_ops.set_pud = kvm_set_pud; #if PAGETABLE_LEVELS == 4 pv_mmu_ops.set_pgd = kvm_set_pgd; + pv_mmu_ops.set_pgd_batched = kvm_set_pgd; #endif #endif pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; @@ -586,7 +587,7 @@ static int __cpuinit kvm_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata kvm_cpu_notifier = { +static struct notifier_block kvm_cpu_notifier = { .notifier_call = kvm_cpu_notify, }; #endif diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 0a8e65e..288a4b0 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -67,13 +67,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (reload) { #ifdef CONFIG_SMP preempt_disable(); - load_LDT(pc); + load_LDT_nolock(pc); if (!cpumask_equal(mm_cpumask(current->mm), cpumask_of(smp_processor_id()))) smp_call_function(flush_ldt, current->mm, 1); preempt_enable(); #else - load_LDT(pc); + load_LDT_nolock(pc); #endif } if (oldsize) { @@ -95,7 +95,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) return err; for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); + write_ldt_entry(new->ldt, i, old->ldt + i); return 0; } @@ -116,6 +116,24 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) retval = copy_ldt(&mm->context, &old_mm->context); mutex_unlock(&old_mm->context.lock); } + + if (tsk == current) { + mm->context.vdso = 0; + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + +#endif +#endif + + } + return retval; } @@ -230,6 +248,13 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { + error = -EINVAL; + goto out_unlock; + } +#endif + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { error = -EINVAL; goto out_unlock; diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index a3fa43b..8966f4c 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -27,7 +27,7 @@ #include #include -static void set_idt(void *newidt, __u16 limit) +static void set_idt(struct desc_struct *newidt, __u16 limit) { struct desc_ptr curidt; @@ -39,7 +39,7 @@ static void set_idt(void *newidt, __u16 limit) } -static void set_gdt(void *newgdt, __u16 limit) +static void set_gdt(struct desc_struct *newgdt, __u16 limit) { struct desc_ptr curgdt; @@ -217,7 +217,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + memcpy(control_page, (void *)ktla_ktva((unsigned long)relocate_kernel), KEXEC_CONTROL_CODE_MAX_SIZE); relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 29c95d7..97b7b1b 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -507,7 +507,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block __refdata mc_cpu_notifier = { +static struct notifier_block mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3ca42d0..7cff8cc 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -436,13 +436,13 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) static int get_ucode_user(void *to, const void *from, size_t n) { - return copy_from_user(to, from, n); + return copy_from_user(to, (const void __force_user *)from, n); } static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { - return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (__force_kernel void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 925179f..b151b74 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -36,15 +36,62 @@ #define DEBUGP(fmt...) #endif -void *module_alloc(unsigned long size) +static inline void *__module_alloc(unsigned long size, pgprot_t prot) __size_overflow(1); +static inline void *__module_alloc(unsigned long size, pgprot_t prot) { - if (PAGE_ALIGN(size) > MODULES_LEN) + if (!size || PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, prot, -1, __builtin_return_address(0)); } +void *module_alloc(unsigned long size) +{ + +#ifdef CONFIG_PAX_KERNEXEC + return __module_alloc(size, PAGE_KERNEL); +#else + return __module_alloc(size, PAGE_KERNEL_EXEC); +#endif + +} + +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_X86_32 +void *module_alloc_exec(unsigned long size) __size_overflow(1); +void *module_alloc_exec(unsigned long size) +{ + struct vm_struct *area; + + if (size == 0) + return NULL; + + area = __get_vm_area(size, VM_ALLOC, (unsigned long)&MODULES_EXEC_VADDR, (unsigned long)&MODULES_EXEC_END); + return area ? area->addr : NULL; +} +EXPORT_SYMBOL(module_alloc_exec); + +void module_free_exec(struct module *mod, void *module_region) +{ + vunmap(module_region); +} +EXPORT_SYMBOL(module_free_exec); +#else +void module_free_exec(struct module *mod, void *module_region) +{ + module_free(mod, module_region); +} +EXPORT_SYMBOL(module_free_exec); + +void *module_alloc_exec(unsigned long size) +{ + return __module_alloc(size, PAGE_KERNEL_RX); +} +EXPORT_SYMBOL(module_alloc_exec); +#endif +#endif + #ifdef CONFIG_X86_32 int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, @@ -55,14 +102,16 @@ int apply_relocate(Elf32_Shdr *sechdrs, unsigned int i; Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym; - uint32_t *location; + uint32_t *plocation, location; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; + plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + location = (uint32_t)plocation; + if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) + plocation = ktla_ktva((void *)plocation); /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr @@ -71,11 +120,15 @@ int apply_relocate(Elf32_Shdr *sechdrs, switch (ELF32_R_TYPE(rel[i].r_info)) { case R_386_32: /* We add the value into the location given */ - *location += sym->st_value; + pax_open_kernel(); + *plocation += sym->st_value; + pax_close_kernel(); break; case R_386_PC32: /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; + pax_open_kernel(); + *plocation += sym->st_value - location; + pax_close_kernel(); break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", @@ -120,21 +173,30 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_X86_64_NONE: break; case R_X86_64_64: + pax_open_kernel(); *(u64 *)loc = val; + pax_close_kernel(); break; case R_X86_64_32: + pax_open_kernel(); *(u32 *)loc = val; + pax_close_kernel(); if (val != *(u32 *)loc) goto overflow; break; case R_X86_64_32S: + pax_open_kernel(); *(s32 *)loc = val; + pax_close_kernel(); if ((s64)val != *(s32 *)loc) goto overflow; break; case R_X86_64_PC32: val -= (u64)loc; + pax_open_kernel(); *(u32 *)loc = val; + pax_close_kernel(); + #if 0 if ((s64)val != *(s32 *)loc) goto overflow; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index f7d1a64..28afc4a 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,11 @@ static ssize_t msr_write(struct file *file, const char __user *buf, int err = 0; ssize_t bytes = 0; +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_msr_write(); + return -EPERM; +#endif + if (count % 8) return -EINVAL; /* Invalid chunk size */ @@ -151,6 +157,10 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) err = -EBADF; break; } +#ifdef CONFIG_GRKERNSEC_KMEM + gr_handle_msr_write(); + return -EPERM; +#endif if (copy_from_user(®s, uregs, sizeof regs)) { err = -EFAULT; break; @@ -235,7 +245,7 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, return notifier_from_errno(err); } -static struct notifier_block __refdata msr_class_cpu_notifier = { +static struct notifier_block msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e88f37b..45bb4ff 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -126,9 +126,9 @@ static int __setup_nmi(unsigned int type, struct nmiaction *action) * event confuses some handlers (kdump uses this flag) */ if (action->flags & NMI_FLAG_FIRST) - list_add_rcu(&action->list, &desc->head); + pax_list_add_rcu((struct list_head *)&action->list, &desc->head); else - list_add_tail_rcu(&action->list, &desc->head); + pax_list_add_tail_rcu((struct list_head *)&action->list, &desc->head); spin_unlock_irqrestore(&desc->lock, flags); return 0; @@ -150,7 +150,7 @@ static struct nmiaction *__free_nmi(unsigned int type, const char *name) if (!strcmp(n->name, name)) { WARN(in_nmi(), "Trying to free NMI (%s) from NMI context!\n", n->name); - list_del_rcu(&n->list); + pax_list_del_rcu((struct list_head *)&n->list); break; } } @@ -408,6 +408,17 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + if (!user_mode(regs)) { + unsigned long cs = regs->cs & 0xFFFF; + unsigned long ip = ktva_ktla(regs->ip); + + if ((cs == __KERNEL_CS || cs == __KERNEXEC_KERNEL_CS) && ip <= (unsigned long)_etext) + regs->ip = ip; + } +#endif + nmi_enter(); inc_irq_stat(__nmi_count); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 676b8c7..870ba04 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -13,7 +13,7 @@ default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) arch_spin_lock(lock); } -struct pv_lock_ops pv_lock_ops = { +struct pv_lock_ops pv_lock_ops __read_only = { #ifdef CONFIG_SMP .spin_is_locked = __ticket_spin_is_locked, .spin_is_contended = __ticket_spin_is_contended, diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 84c938f..fa25421 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -53,6 +53,9 @@ u64 _paravirt_ident_64(u64 x) { return x; } +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) +PV_CALLEE_SAVE_REGS_THUNK(_paravirt_ident_64); +#endif void __init default_banner(void) { @@ -144,16 +147,20 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == _paravirt_nop) + ret = paravirt_patch_insns(insnbuf, len, ktva_ktla(ud2a), ud2a+sizeof(ud2a)); + else if (opfunc == (void *)_paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); /* identity functions just return their single argument */ - else if (opfunc == _paravirt_ident_32) + else if (opfunc == (void *)_paravirt_ident_32) ret = paravirt_patch_ident_32(insnbuf, len); - else if (opfunc == _paravirt_ident_64) + else if (opfunc == (void *)_paravirt_ident_64) ret = paravirt_patch_ident_64(insnbuf, len); +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) + else if (opfunc == (void *)__raw_callee_save__paravirt_ident_64) + ret = paravirt_patch_ident_64(insnbuf, len); +#endif else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || @@ -178,7 +185,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, if (insn_len > len || start == NULL) insn_len = len; else - memcpy(insnbuf, start, insn_len); + memcpy(insnbuf, ktla_ktva(start), insn_len); return insn_len; } @@ -302,7 +309,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return percpu_read(paravirt_lazy_mode); } -struct pv_info pv_info = { +struct pv_info pv_info __read_only = { .name = "bare hardware", .paravirt_enabled = 0, .kernel_rpl = 0, @@ -313,16 +320,16 @@ struct pv_info pv_info = { #endif }; -struct pv_init_ops pv_init_ops = { +struct pv_init_ops pv_init_ops __read_only = { .patch = native_patch, }; -struct pv_time_ops pv_time_ops = { +struct pv_time_ops pv_time_ops __read_only = { .sched_clock = native_sched_clock, .steal_clock = native_steal_clock, }; -struct pv_irq_ops pv_irq_ops = { +struct pv_irq_ops pv_irq_ops __read_only = { .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), @@ -334,7 +341,7 @@ struct pv_irq_ops pv_irq_ops = { #endif }; -struct pv_cpu_ops pv_cpu_ops = { +struct pv_cpu_ops pv_cpu_ops __read_only = { .cpuid = native_cpuid, .get_debugreg = native_get_debugreg, .set_debugreg = native_set_debugreg, @@ -395,21 +402,26 @@ struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; -struct pv_apic_ops pv_apic_ops = { +struct pv_apic_ops pv_apic_ops __read_only= { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, #endif }; -#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) +#ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_PAE +/* 64-bit pagetable entries */ +#define PTE_IDENT PV_CALLEE_SAVE(_paravirt_ident_64) +#else /* 32-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) +#endif #else /* 64-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif -struct pv_mmu_ops pv_mmu_ops = { +struct pv_mmu_ops pv_mmu_ops __read_only = { .read_cr2 = native_read_cr2, .write_cr2 = native_write_cr2, @@ -459,6 +471,7 @@ struct pv_mmu_ops pv_mmu_ops = { .make_pud = PTE_IDENT, .set_pgd = native_set_pgd, + .set_pgd_batched = native_set_pgd_batched, #endif #endif /* PAGETABLE_LEVELS >= 3 */ @@ -479,6 +492,12 @@ struct pv_mmu_ops pv_mmu_ops = { }, .set_fixmap = native_set_fixmap, + +#ifdef CONFIG_PAX_KERNEXEC + .pax_open_kernel = native_pax_open_kernel, + .pax_close_kernel = native_pax_close_kernel, +#endif + }; EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index a1da673..2c72d5b 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -9,7 +9,9 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); +#ifndef CONFIG_PAX_MEMORY_UDEREF DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); +#endif DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); @@ -57,7 +59,9 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_mmu_ops, read_cr3); PATCH_SITE(pv_mmu_ops, write_cr3); PATCH_SITE(pv_cpu_ops, clts); +#ifndef CONFIG_PAX_MEMORY_UDEREF PATCH_SITE(pv_mmu_ops, flush_tlb_single); +#endif PATCH_SITE(pv_cpu_ops, wbinvd); patch_site: diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 726494b..5d942a3 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1341,7 +1341,7 @@ static void __init get_tce_space_from_tar(void) tce_space = be64_to_cpu(readq(target)); tce_space = tce_space & TAR_SW_BITS; - tce_space = tce_space & (~specified_table_size); + tce_space = tce_space & (~(unsigned long)specified_table_size); info->tce_space = (u64 *)__va(tce_space); } } diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c index 35ccf75..7a15747 100644 --- a/arch/x86/kernel/pci-iommu_table.c +++ b/arch/x86/kernel/pci-iommu_table.c @@ -2,7 +2,7 @@ #include #include #include - +#include #define DEBUG 1 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 59b9b37..f02ee42 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -48,16 +48,33 @@ void free_thread_xstate(struct task_struct *tsk) void free_thread_info(struct thread_info *ti) { - free_thread_xstate(ti->task); free_pages((unsigned long)ti, THREAD_ORDER); } +static struct kmem_cache *task_struct_cachep; + void arch_task_cache_init(void) { - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, + /* create a slab on which task_structs can be allocated */ + task_struct_cachep = + kmem_cache_create("task_struct", sizeof(struct task_struct), + ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); + + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); + SLAB_PANIC | SLAB_NOTRACK | SLAB_USERCOPY, NULL); +} + +struct task_struct *alloc_task_struct_node(int node) +{ + return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); +} + +void free_task_struct(struct task_struct *task) +{ + free_thread_xstate(task); + kmem_cache_free(task_struct_cachep, task); } /* @@ -70,7 +87,7 @@ void exit_thread(void) unsigned long *bp = t->io_bitmap_ptr; if (bp) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss = init_tss + get_cpu(); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); @@ -106,7 +123,7 @@ void show_regs_common(void) printk(KERN_CONT "\n"); printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", - current->pid, current->comm, print_tainted(), + task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); @@ -120,6 +137,9 @@ void flush_thread(void) { struct task_struct *tsk = current; +#if defined(CONFIG_X86_32) && !defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_PAX_MEMORY_UDEREF) + loadsegment(gs, 0); +#endif flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -282,10 +302,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.di = (unsigned long) arg; #ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; + regs.ds = __KERNEL_DS; + regs.es = __KERNEL_DS; regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; + savesegment(gs, regs.gs); #else regs.ss = __KERNEL_DS; #endif @@ -387,7 +407,7 @@ bool set_pm_idle_to_default(void) return ret; } -void stop_this_cpu(void *dummy) +__noreturn void stop_this_cpu(void *dummy) { local_irq_disable(); /* @@ -629,16 +649,37 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -unsigned long arch_align_stack(unsigned long sp) +#ifdef CONFIG_PAX_RANDKSTACK +void pax_randomize_kstack(struct pt_regs *regs) { - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} + struct thread_struct *thread = ¤t->thread; + unsigned long time; -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long range_end = mm->brk + 0x02000000; - return randomize_range(mm->brk, range_end, 0) ? : mm->brk; -} + if (!randomize_va_space) + return; + + if (v8086_mode(regs)) + return; + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x3EUL; + time <<= 2; +#elif defined(CONFIG_X86_64) + time &= 0xFUL; + time <<= 4; +#else + time &= 0x1FUL; + time <<= 3; +#endif + + thread->sp0 ^= time; + load_sp0(init_tss + smp_processor_id(), thread); + +#ifdef CONFIG_X86_64 + percpu_write(kernel_stack, thread->sp0); +#endif +} +#endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8598296..3fd3443 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -67,6 +67,7 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); unsigned long thread_saved_pc(struct task_struct *tsk) { return ((unsigned long *)tsk->thread.sp)[3]; +//XXX return tsk->thread.eip; } #ifndef CONFIG_SMP @@ -130,21 +131,20 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long sp; unsigned short ss, gs; - if (user_mode_vm(regs)) { + if (user_mode(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; - gs = get_user_gs(regs); } else { sp = kernel_stack_pointer(regs); savesegment(ss, ss); - savesegment(gs, gs); } + gs = get_user_gs(regs); show_regs_common(); printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, - smp_processor_id()); + raw_smp_processor_id()); print_symbol("EIP is at %s\n", regs->ip); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", @@ -200,13 +200,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); + p->tinfo.lowest_stack = (unsigned long)task_stack_page(p) + 2 * sizeof(unsigned long); p->thread.ip = (unsigned long) ret_from_fork; @@ -296,7 +297,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; fpu_switch_t fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -320,6 +321,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ lazy_save_gs(prev->gs); +#ifdef CONFIG_PAX_MEMORY_UDEREF + __set_fs(task_thread_info(next_p)->addr_limit); +#endif + /* * Load the per-thread Thread-Local Storage descriptor. */ @@ -350,6 +355,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); + percpu_write(current_task, next_p); + percpu_write(current_tinfo, &next_p->tinfo); + /* * Restore %gs if needed (which is common) */ @@ -358,8 +366,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) switch_fpu_finish(next_p, fpu); - percpu_write(current_task, next_p); - return prev_p; } @@ -389,4 +395,3 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } - diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e361095..4882b55 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -89,7 +89,7 @@ static void __exit_idle(void) void exit_idle(void) { /* idle loop has pid 0 */ - if (current->pid) + if (task_pid_nr(current)) return; __exit_idle(); } @@ -264,8 +264,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, struct pt_regs *childregs; struct task_struct *me = current; - childregs = ((struct pt_regs *) - (THREAD_SIZE + task_stack_page(p))) - 1; + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 16; *childregs = *regs; childregs->ax = 0; @@ -277,6 +276,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); p->thread.usersp = me->thread.usersp; + p->tinfo.lowest_stack = (unsigned long)task_stack_page(p) + 2 * sizeof(unsigned long); set_tsk_thread_flag(p, TIF_FORK); @@ -379,7 +379,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; unsigned fsindex, gsindex; fpu_switch_t fpu; @@ -506,10 +506,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) prev->usersp = percpu_read(old_rsp); percpu_write(old_rsp, next->usersp); percpu_write(current_task, next_p); + percpu_write(current_tinfo, &next_p->tinfo); - percpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - KERNEL_STACK_OFFSET); + percpu_write(kernel_stack, next->sp0); /* * Now maybe reload the debug registers and handle I/O bitmaps @@ -564,12 +563,11 @@ unsigned long get_wchan(struct task_struct *p) if (!p || p == current || p->state == TASK_RUNNING) return 0; stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE-16-sizeof(u64)) return 0; fp = *(u64 *)(p->thread.sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < stack || fp > stack+THREAD_SIZE-16-sizeof(u64)) return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2dc4121..c7c8aac 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -181,14 +181,13 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) { unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); unsigned long sp = (unsigned long)®s->sp; - struct thread_info *tinfo; - if (context == (sp & ~(THREAD_SIZE - 1))) + if (context == ((sp + 8) & ~(THREAD_SIZE - 1))) return sp; - tinfo = (struct thread_info *)context; - if (tinfo->previous_esp) - return tinfo->previous_esp; + sp = *(unsigned long *)context; + if (sp) + return sp; return (unsigned long)regs; } @@ -449,6 +448,20 @@ static int putreg(struct task_struct *child, if (child->thread.gs != value) return do_arch_prctl(child, ARCH_SET_GS, value); return 0; + + case offsetof(struct user_regs_struct,ip): + /* + * Protect against any attempt to set ip to an + * impossible address. There are dragons lurking if the + * address is noncanonical. (This explicitly allows + * setting ip to TASK_SIZE_MAX, because user code can do + * that all by itself by running off the end of its + * address space. + */ + if (value > TASK_SIZE_MAX) + return -EIO; + break; + #endif } @@ -585,7 +598,7 @@ static void ptrace_triggered(struct perf_event *bp, static unsigned long ptrace_get_dr7(struct perf_event *bp[]) { int i; - int dr7 = 0; + unsigned long dr7 = 0; struct arch_hw_breakpoint *info; for (i = 0; i < HBP_NUM; i++) { @@ -852,7 +865,7 @@ long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; - unsigned long __user *datap = (unsigned long __user *)data; + unsigned long __user *datap = (__force unsigned long __user *)data; switch (request) { /* read the word at location addr in the USER area. */ @@ -937,14 +950,14 @@ long arch_ptrace(struct task_struct *child, long request, if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, - (struct user_desc __user *)data); + (__force struct user_desc __user *) data); break; case PTRACE_SET_THREAD_AREA: if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, - (struct user_desc __user *)data, 0); + (__force struct user_desc __user *) data, 0); break; #endif @@ -1229,7 +1242,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, #ifdef CONFIG_X86_64 -static struct user_regset x86_64_regsets[] __read_mostly = { +static user_regset_no_const x86_64_regsets[] __read_only = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct) / sizeof(long), @@ -1273,7 +1286,7 @@ static const struct user_regset_view user_x86_64_view = { #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION -static struct user_regset x86_32_regsets[] __read_mostly = { +static user_regset_no_const x86_32_regsets[] __read_only = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, .n = sizeof(struct user_regs_struct32) / sizeof(u32), @@ -1326,7 +1339,7 @@ static const struct user_regset_view user_x86_32_view = { */ u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -void update_regset_xstate_info(unsigned int size, u64 xstate_mask) +void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask) { #ifdef CONFIG_X86_64 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); @@ -1361,7 +1374,7 @@ static void fill_sigtrap_info(struct task_struct *tsk, memset(info, 0, sizeof(*info)); info->si_signo = SIGTRAP; info->si_code = si_code; - info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; + info->si_addr = user_mode(regs) ? (__force void __user *)regs->ip : NULL; } void user_single_step_siginfo(struct task_struct *tsk, @@ -1390,6 +1403,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, # define IS_IA32 0 #endif +#ifdef CONFIG_GRKERNSEC_SETXID +extern void gr_delayed_cred_worker(void); +#endif + /* * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. @@ -1398,6 +1415,11 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1409,7 +1431,11 @@ long syscall_trace_enter(struct pt_regs *regs) regs->flags |= X86_EFLAGS_TF; /* do the secure computing check first */ - secure_computing(regs->orig_ax); + if (secure_computing(regs->orig_ax)) { + /* seccomp failures shouldn't expose any additional code. */ + ret = -1L; + goto out; + } if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; @@ -1436,6 +1462,7 @@ long syscall_trace_enter(struct pt_regs *regs) #endif } +out: return ret ?: regs->orig_ax; } @@ -1443,6 +1470,11 @@ void syscall_trace_leave(struct pt_regs *regs) { bool step; +#ifdef CONFIG_GRKERNSEC_SETXID + if (unlikely(test_and_clear_thread_flag(TIF_GRSEC_SETXID))) + gr_delayed_cred_worker(); +#endif + if (unlikely(current->audit_context)) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 42eb330..139955c 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -81,11 +81,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) return pv_tsc_khz; } -static atomic64_t last_value = ATOMIC64_INIT(0); +static atomic64_unchecked_t last_value = ATOMIC64_INIT(0); void pvclock_resume(void) { - atomic64_set(&last_value, 0); + atomic64_set_unchecked(&last_value, 0); } cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) @@ -121,11 +121,11 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) * updating at the same time, and one of them could be slightly behind, * making the assumption that last_value always go forward fail to hold. */ - last = atomic64_read(&last_value); + last = atomic64_read_unchecked(&last_value); do { if (ret < last) return last; - last = atomic64_cmpxchg(&last_value, last, ret); + last = atomic64_cmpxchg_unchecked(&last_value, last, ret); } while (unlikely(last != ret)); return ret; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 41b2f57..9dd7145 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -35,7 +35,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; -static int reboot_mode; +static unsigned short reboot_mode; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; @@ -145,7 +145,7 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id __initdata reboot_dmi_table[] = { +static const struct dmi_system_id __initconst reboot_dmi_table[] = { { /* Handle problems with rebooting on Dell E520's */ .callback = set_bios_reboot, .ident = "Dell E520", @@ -308,13 +308,17 @@ core_initcall(reboot_init); extern const unsigned char machine_real_restart_asm[]; extern const u64 machine_real_restart_gdt[3]; -void machine_real_restart(unsigned int type) +__noreturn void machine_real_restart(unsigned int type) { void *restart_va; unsigned long restart_pa; - void (*restart_lowmem)(unsigned int); + void (* __noreturn restart_lowmem)(unsigned int); u64 *lowmem_gdt; +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)) + struct desc_struct *gdt; +#endif + local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -340,14 +344,14 @@ void machine_real_restart(unsigned int type) boot)". This seems like a fairly standard thing that gets set by REBOOT.COM programs, and the previous reset routine did this too. */ - *((unsigned short *)0x472) = reboot_mode; + *(unsigned short *)(__va(0x472)) = reboot_mode; /* Patch the GDT in the low memory trampoline */ lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); restart_pa = virt_to_phys(restart_va); - restart_lowmem = (void (*)(unsigned int))restart_pa; + restart_lowmem = (void *)restart_pa; /* GDT[0]: GDT self-pointer */ lowmem_gdt[0] = @@ -358,7 +362,35 @@ void machine_real_restart(unsigned int type) GDT_ENTRY(0x009b, restart_pa, 0xffff); /* Jump to the identity-mapped low memory code */ + +#if defined(CONFIG_X86_32) && (defined(CONFIG_PAX_KERNEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF)) + gdt = get_cpu_gdt_table(smp_processor_id()); + pax_open_kernel(); +#ifdef CONFIG_PAX_MEMORY_UDEREF + gdt[GDT_ENTRY_KERNEL_DS].type = 3; + gdt[GDT_ENTRY_KERNEL_DS].limit = 0xf; + loadsegment(ds, __KERNEL_DS); + loadsegment(es, __KERNEL_DS); + loadsegment(ss, __KERNEL_DS); +#endif +#ifdef CONFIG_PAX_KERNEXEC + gdt[GDT_ENTRY_KERNEL_CS].base0 = 0; + gdt[GDT_ENTRY_KERNEL_CS].base1 = 0; + gdt[GDT_ENTRY_KERNEL_CS].base2 = 0; + gdt[GDT_ENTRY_KERNEL_CS].limit0 = 0xffff; + gdt[GDT_ENTRY_KERNEL_CS].limit = 0xf; + gdt[GDT_ENTRY_KERNEL_CS].g = 1; +#endif + pax_close_kernel(); +#endif + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + asm volatile("push %0; push %1; lret\n" : : "i" (__KERNEL_CS), "rm" (restart_lowmem), "a" (type)); + unreachable(); +#else restart_lowmem(type); +#endif + } #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(machine_real_restart); @@ -590,7 +622,7 @@ void __attribute__((weak)) mach_reboot_fixups(void) * try to force a triple fault and then cycle between hitting the keyboard * controller and doing that */ -static void native_machine_emergency_restart(void) +static void __noreturn native_machine_emergency_restart(void) { int i; int attempt = 0; @@ -717,13 +749,13 @@ void native_machine_shutdown(void) #endif } -static void __machine_emergency_restart(int emergency) +static __noreturn void __machine_emergency_restart(int emergency) { reboot_emergency = emergency; machine_ops.emergency_restart(); } -static void native_machine_restart(char *__unused) +static void __noreturn native_machine_restart(char *__unused) { printk("machine restart\n"); @@ -732,7 +764,7 @@ static void native_machine_restart(char *__unused) __machine_emergency_restart(0); } -static void native_machine_halt(void) +static void __noreturn native_machine_halt(void) { /* stop other cpus and apics */ machine_shutdown(); @@ -743,7 +775,7 @@ static void native_machine_halt(void) stop_this_cpu(NULL); } -static void native_machine_power_off(void) +static void __noreturn native_machine_power_off(void) { if (pm_power_off) { if (!reboot_force) @@ -752,9 +784,10 @@ static void native_machine_power_off(void) } /* a fallback in case there is no PM info available */ tboot_shutdown(TB_SHUTDOWN_HALT); + unreachable(); } -struct machine_ops machine_ops = { +struct machine_ops machine_ops __read_only = { .power_off = native_machine_power_off, .shutdown = native_machine_shutdown, .emergency_restart = native_machine_emergency_restart, diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index c8e41e9..64049ef 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -57,7 +57,7 @@ struct device_fixup { unsigned int vendor; unsigned int device; void (*reboot_fixup)(struct pci_dev *); -}; +} __do_const; /* * PCI ids solely used for fixups_table go here diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b506f41..c954434 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -176,9 +176,17 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { + .wp_works_ok = -1, + .hlt_works_ok = 1, + .fdiv_bug = -1, +}; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { + .wp_works_ok = -1, + .hlt_works_ok = 1, + .fdiv_bug = -1, +}; EXPORT_SYMBOL(boot_cpu_data); static void set_mca_bus(int x) { @@ -447,7 +455,7 @@ static void __init parse_setup_data(void) switch (data->type) { case SETUP_E820_EXT: - parse_e820_ext(data); + parse_e820_ext((struct setup_data __force_kernel *)data); break; case SETUP_DTB: add_dtb(pa_data); @@ -727,7 +735,7 @@ static void __init trim_bios_range(void) * area (640->1Mb) as ram even though it is not. * take them out. */ - e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); + e820_remove_range(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, E820_RAM, 1); sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } @@ -852,14 +860,14 @@ void __init setup_arch(char **cmdline_p) if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; + init_mm.start_code = ktla_ktva((unsigned long) _text); + init_mm.end_code = ktla_ktva((unsigned long) _etext); init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + code_resource.start = virt_to_phys(ktla_ktva(_text)); + code_resource.end = virt_to_phys(ktla_ktva(_etext))-1; + data_resource.start = virt_to_phys(_sdata); data_resource.end = virt_to_phys(_edata)-1; bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5a98aa2..5aa4ffc 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -21,19 +21,17 @@ #include #include -DEFINE_PER_CPU(int, cpu_number); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(unsigned int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +#endif -#ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) -#else -#define BOOT_PERCPU_OFFSET 0 -#endif DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { +unsigned long __per_cpu_offset[NR_CPUS] __read_only = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); @@ -66,7 +64,7 @@ static bool __init pcpu_need_numa(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES pg_data_t *last = NULL; - unsigned int cpu; + int cpu; for_each_possible_cpu(cpu) { int node = early_cpu_to_node(cpu); @@ -155,10 +153,10 @@ static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 struct desc_struct gdt; + unsigned long base = per_cpu_offset(cpu); - pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; + pack_descriptor(&gdt, base, (VMALLOC_END - base - 1) >> PAGE_SHIFT, + 0x83 | DESCTYPE_S, 0xC); write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); #endif @@ -219,6 +217,11 @@ void __init setup_per_cpu_areas(void) /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { +#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_X86_32 + unsigned long canary = per_cpu(stack_canary.canary, cpu); +#endif +#endif per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; @@ -259,6 +262,12 @@ void __init setup_per_cpu_areas(void) */ set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif +#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_X86_32 + if (!cpu) + per_cpu(stack_canary.canary, cpu) = canary; +#endif +#endif /* * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 54ddaeb2..158e022 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -198,7 +198,7 @@ static unsigned long align_sigframe(unsigned long sp) * Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - sp = ((sp + 4) & -16ul) - 4; + sp = ((sp - 12) & -16ul) - 4; #else /* !CONFIG_X86_32 */ sp = round_down(sp, 16) - 8; #endif @@ -249,11 +249,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, * Return an always-bogus address instead so we will die with SIGSEGV. */ if (onsigstack && !likely(on_sig_stack(sp))) - return (void __user *)-1L; + return (__force void __user *)-1L; /* save i387 state */ if (used_math() && save_i387_xstate(*fpstate) < 0) - return (void __user *)-1L; + return (__force void __user *)-1L; return (void __user *)sp; } @@ -308,9 +308,9 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, } if (current->mm->context.vdso) - restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); + restorer = (__force void __user *)VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); else - restorer = &frame->retcode; + restorer = (void __user *)&frame->retcode; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -324,7 +324,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, * reasons and because gdb uses it as a signature to notice * signal handler stack frames. */ - err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); + err |= __put_user(*((u64 *)&retcode), (u64 __user *)frame->retcode); if (err) return -EFAULT; @@ -378,7 +378,10 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (current->mm->context.vdso) + restorer = (__force void __user *)VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + else + restorer = (void __user *)&frame->retcode; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; put_user_ex(restorer, &frame->pretcode); @@ -390,7 +393,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * reasons and because gdb uses it as a signature to notice * signal handler stack frames. */ - put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&rt_retcode), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -655,19 +658,22 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, { int usig = signr_convert(sig); sigset_t *set = ¤t->blocked; + sigset_t sigcopy; int ret; if (current_thread_info()->status & TS_RESTORE_SIGMASK) set = ¤t->saved_sigmask; + sigcopy = *set; + /* Set up the stack frame */ if (is_ia32) { if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(usig, ka, info, set, regs); + ret = ia32_setup_rt_frame(usig, ka, info, &sigcopy, regs); else - ret = ia32_setup_frame(usig, ka, set, regs); + ret = ia32_setup_frame(usig, ka, &sigcopy, regs); } else - ret = __setup_rt_frame(sig, ka, info, set, regs); + ret = __setup_rt_frame(sig, ka, info, &sigcopy, regs); if (ret) { force_sigsegv(sig, current); @@ -769,7 +775,7 @@ static void do_signal(struct pt_regs *regs) * X86_32: vm86 regs switched out by assembly code before reaching * here, so testing against kernel CS suffices. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) return; signr = get_signal_to_deliver(&info, &ka, regs, NULL); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 16204dc..0e7d4b7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -225,7 +225,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) irq_exit(); } -struct smp_ops smp_ops = { +struct smp_ops smp_ops __read_only = { .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_cpus = native_smp_prepare_cpus, .smp_cpus_done = native_smp_cpus_done, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb28f2ca..e377b54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -252,11 +252,13 @@ notrace static void __cpuinit start_secondary(void *unused) preempt_disable(); smp_callin(); -#ifdef CONFIG_X86_32 /* switch away from the initial page table */ +#ifdef CONFIG_PAX_PER_CPU_PGD + load_cr3(get_cpu_pgd(smp_processor_id())); +#else load_cr3(swapper_pg_dir); +#endif __flush_tlb_all(); -#endif /* otherwise gcc will move up smp_processor_id before the cpu_init */ barrier(); @@ -699,7 +701,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) */ if (c_idle.idle) { c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) - (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); + (THREAD_SIZE - 16 + task_stack_page(c_idle.idle))) - 1); init_idle(c_idle.idle, cpu); goto do_rest; } @@ -716,17 +718,20 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: per_cpu(current_task, cpu) = c_idle.idle; + per_cpu(current_tinfo, cpu) = &c_idle.idle->tinfo; #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(c_idle.idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; + per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(c_idle.idle) - 16 + THREAD_SIZE; #endif + + pax_open_kernel(); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + pax_close_kernel(); + initial_code = (unsigned long)start_secondary; stack_start = c_idle.idle->thread.sp; @@ -868,6 +873,12 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; +#ifdef CONFIG_PAX_PER_CPU_PGD + clone_pgd_range(get_cpu_pgd(cpu) + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); +#endif + err = do_boot_cpu(apicid, cpu); if (err) { pr_debug("do_boot_cpu failed %d\n", err); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index d4f278e..86c58c0 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -27,10 +27,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re struct desc_struct *desc; unsigned long base; - seg &= ~7UL; + seg >>= 3; mutex_lock(&child->mm->context.lock); - if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(seg >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { desc = child->mm->context.ldt + seg; @@ -42,7 +42,8 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re addr += base; } mutex_unlock(&child->mm->context.lock); - } + } else if (seg == __KERNEL_CS || seg == __KERNEXEC_KERNEL_CS) + addr = ktla_ktva(addr); return addr; } @@ -53,6 +54,9 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) unsigned char opcode[15]; unsigned long addr = convert_ip_to_linear(child, regs); + if (addr == -EINVAL) + return 0; + copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); for (i = 0; i < copied; i++) { switch (opcode[i]) { diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 0b0cb5f..560d0df 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -24,17 +24,228 @@ #include -/* - * Do a system call from kernel instead of calling sys_execve so we - * end up with proper pt_regs. - */ -int kernel_execve(const char *filename, - const char *const argv[], - const char *const envp[]) +int i386_mmap_check(unsigned long addr, unsigned long len, unsigned long flags) { - long __res; - asm volatile ("int $0x80" - : "=a" (__res) - : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); - return __res; + unsigned long pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (current->mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (flags & MAP_FIXED) + if (len > pax_task_size || addr > pax_task_size - len) + return -EINVAL; + + return 0; +} + +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, pax_task_size = TASK_SIZE; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; + + if (len > pax_task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + if (pax_task_size - len >= addr) { + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) + return addr; + } + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(__supported_pte_mask & _PAGE_NX) && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { + start_addr = 0x00110000UL; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + start_addr += mm->delta_mmap & 0x03FFF000UL; +#endif + + if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) + start_addr = addr = mm->mmap_base; + else + addr = start_addr; + } +#endif + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (pax_task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (check_heap_stack_gap(vma, &addr, len, offset)) + break; + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (mm->start_brk <= addr && addr < mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + } + + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, pax_task_size = TASK_SIZE; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; + + /* requested length too big for entire address space */ + if (len > pax_task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(__supported_pte_mask & _PAGE_NX) && (mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) + goto bottomup; +#endif + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + if (pax_task_size - len >= addr) { + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) + return addr; + } + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + addr -= len; + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base-len; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; + else +#endif + + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; } diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index cdb2fc9..a7264e0 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -95,8 +95,8 @@ out: return error; } -static void find_start_end(unsigned long flags, unsigned long *begin, - unsigned long *end) +static void find_start_end(struct mm_struct *mm, unsigned long flags, + unsigned long *begin, unsigned long *end) { if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) { unsigned long new_begin; @@ -115,7 +115,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, *begin = new_begin; } } else { - *begin = current->mm->mmap_legacy_base; + *begin = mm->mmap_legacy_base; *end = TASK_SIZE; } } @@ -128,20 +128,24 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; unsigned long start_addr; unsigned long begin, end; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); if (flags & MAP_FIXED) return addr; - find_start_end(flags, &begin, &end); + find_start_end(mm, flags, &begin, &end); if (len > end) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (end - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (end - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) @@ -172,7 +176,7 @@ full_search: } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* * Remember the place where we stopped the search: */ @@ -195,7 +199,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long base = mm->mmap_base, addr = addr0; + unsigned long offset = gr_rand_threadstack_offset(mm, filp, flags); /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -208,13 +213,18 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (!test_thread_flag(TIF_IA32) && (flags & MAP_32BIT)) goto bottomup; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) - return addr; + if (TASK_SIZE - len >= addr) { + vma = find_vma(mm, addr); + if (check_heap_stack_gap(vma, &addr, len, offset)) + return addr; + } } /* check if free_area_cache is useful for us */ @@ -232,7 +242,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, ALIGN_TOPDOWN); vma = find_vma(mm, tmp_addr); - if (!vma || tmp_addr + len <= vma->vm_start) + if (check_heap_stack_gap(vma, &tmp_addr, len, offset)) /* remember the address as a hint for next time */ return mm->free_area_cache = tmp_addr; } @@ -251,7 +261,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, * return with success: */ vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) + if (check_heap_stack_gap(vma, &addr, len, offset)) /* remember the address as a hint for next time */ return mm->free_area_cache = addr; @@ -260,8 +270,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->cached_hole_size = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = vma->vm_start-len; - } while (len < vma->vm_start); + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); bottomup: /* @@ -270,13 +280,21 @@ bottomup: * can happen with large stack limits and large mmap() * allocations. */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); /* * Restore the topdown base: */ - mm->free_area_cache = mm->mmap_base; + mm->mmap_base = base; + mm->free_area_cache = base; mm->cached_hole_size = ~0UL; return addr; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 9a0e312..b5ecbc0 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -1,3 +1,4 @@ +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit @@ -272,7 +273,7 @@ ENTRY(sys_call_table) .long sys_tgkill /* 270 */ .long sys_utimes .long sys_fadvise64_64 - .long sys_ni_syscall /* sys_vserver */ + .long sys_vserver .long sys_mbind .long sys_get_mempolicy .long sys_set_mempolicy diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e2410e2..5e8d841 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -219,7 +219,7 @@ static int tboot_setup_sleep(void) void tboot_shutdown(u32 shutdown_type) { - void (*shutdown)(void); + void (* __noreturn shutdown)(void); if (!tboot_enabled()) return; @@ -241,7 +241,7 @@ void tboot_shutdown(u32 shutdown_type) switch_to_tboot_pt(); - shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; + shutdown = (void *)(unsigned long)tboot->shutdown_entry; shutdown(); /* should not reach here */ @@ -298,7 +298,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) tboot_shutdown(acpi_shutdown_map[sleep_state]); } -static atomic_t ap_wfs_count; +static atomic_unchecked_t ap_wfs_count; static int tboot_wait_for_aps(int num_aps) { @@ -322,16 +322,16 @@ static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, { switch (action) { case CPU_DYING: - atomic_inc(&ap_wfs_count); + atomic_inc_unchecked(&ap_wfs_count); if (num_online_cpus() == 1) - if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) + if (tboot_wait_for_aps(atomic_read_unchecked(&ap_wfs_count))) return NOTIFY_BAD; break; } return NOTIFY_OK; } -static struct notifier_block tboot_cpu_notifier __cpuinitdata = +static struct notifier_block tboot_cpu_notifier = { .notifier_call = tboot_cpu_callback, }; @@ -343,7 +343,7 @@ static __init int tboot_late_init(void) tboot_create_trampoline(); - atomic_set(&ap_wfs_count, 0); + atomic_set_unchecked(&ap_wfs_count, 0); register_hotcpu_notifier(&tboot_cpu_notifier); return 0; } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index dd5fbf4..b7f2232 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -31,9 +31,9 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (!user_mode_vm(regs) && in_lock_functions(pc)) { + if (!user_mode(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER - return *(unsigned long *)(regs->bp + sizeof(long)); + return ktla_ktva(*(unsigned long *)(regs->bp + sizeof(long))); #else unsigned long *sp = (unsigned long *)kernel_stack_pointer(regs); @@ -42,11 +42,17 @@ unsigned long profile_pc(struct pt_regs *regs) * or above a saved flags. Eflags has bits 22-31 zero, * kernel addresses don't. */ + +#ifdef CONFIG_PAX_KERNEXEC + return ktla_ktva(sp[0]); +#else if (sp[0] >> 22) return sp[0]; if (sp[1] >> 22) return sp[1]; #endif + +#endif } return pc; } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 0c38d06..79ea0e3 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -140,6 +140,11 @@ int do_set_thread_area(struct task_struct *p, int idx, if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; +#ifdef CONFIG_PAX_SEGMEXEC + if ((p->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + set_tls_desc(p, idx, &info, 1); return 0; @@ -261,7 +266,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, if (kbuf) info = kbuf; - else if (__copy_from_user(infobuf, ubuf, count)) + else if (count > sizeof infobuf || __copy_from_user(infobuf, ubuf, count)) return -EFAULT; else info = infobuf; diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 451c0a7..e57f551 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -32,6 +32,12 @@ #include #include +#ifdef CONFIG_PAX_KERNEXEC +#define ta(X) (X) +#else +#define ta(X) ((X) - __PAGE_OFFSET) +#endif + #ifdef CONFIG_SMP .section ".x86_trampoline","a" @@ -62,7 +68,7 @@ r_base = . inc %ax # protected mode (PE) bit lmsw %ax # into protected mode # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S - ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) + ljmpl $__BOOT_CS, $ta(startup_32_smp) # These need to be in the same 64K segment as the above; # hence we don't use the boot_gdt_descr defined in head.S diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 09ff517..df19fbff 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -90,7 +90,7 @@ startup_32: movl $__KERNEL_DS, %eax # Initialize the %ds segment register movl %eax, %ds - movl $X86_CR4_PAE, %eax + movl $(X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE), %eax movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables @@ -138,7 +138,7 @@ tidt: # so the kernel can live anywhere .balign 4 tgdt: - .short tgdt_end - tgdt # gdt limit + .short tgdt_end - tgdt - 1 # gdt limit .long tgdt - r_base .short 0 .quad 0x00cf9b000000ffff # __KERNEL32_CS diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index edbd30a..d83c9a9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -70,12 +70,6 @@ asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq; - -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround. - */ -gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; #endif DECLARE_BITMAP(used_vectors, NR_VECTORS); @@ -108,13 +102,13 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) } static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, +do_trap(int trapnr, int signr, const char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { struct task_struct *tsk = current; #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { /* * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. @@ -125,7 +119,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } #endif - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto kernel_trap; #ifdef CONFIG_X86_32 @@ -148,7 +142,7 @@ trap_signal: printk_ratelimit()) { printk(KERN_INFO "%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, + tsk->comm, task_pid_nr(tsk), str, regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); @@ -165,8 +159,20 @@ kernel_trap: if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + if (trapnr == X86_TRAP_SS && ((regs->cs & 0xFFFF) == __KERNEL_CS || (regs->cs & 0xFFFF) == __KERNEXEC_KERNEL_CS)) + str = "PAX: suspicious stack segment fault"; +#endif + die(str, regs, error_code); } + +#ifdef CONFIG_PAX_REFCOUNT + if (trapnr == X86_TRAP_OF) + pax_report_refcount_overflow(regs); +#endif + return; #ifdef CONFIG_X86_32 @@ -254,6 +260,11 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_no = X86_TRAP_DF; +#ifdef CONFIG_GRKERNSEC_KSTACKOVERFLOW + if ((unsigned long)tsk->stack - regs->sp <= PAGE_SIZE) + die("grsec: kernel stack overflow detected", regs, error_code); +#endif + /* * This is always a kernel trap and never fixable (and thus must * never return). @@ -271,14 +282,30 @@ do_general_protection(struct pt_regs *regs, long error_code) conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) + if (v8086_mode(regs)) goto gp_in_vm86; #endif tsk = current; - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto gp_in_kernel; +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) + if (!(__supported_pte_mask & _PAGE_NX) && tsk->mm && (tsk->mm->pax_flags & MF_PAX_PAGEEXEC)) { + struct mm_struct *mm = tsk->mm; + unsigned long limit; + + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, VM_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } +#endif + tsk->thread.error_code = error_code; tsk->thread.trap_no = X86_TRAP_GP; @@ -311,6 +338,13 @@ gp_in_kernel: if (notify_die(DIE_GPF, "general protection fault", regs, error_code, X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) return; + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + if ((regs->cs & 0xFFFF) == __KERNEL_CS || (regs->cs & 0xFFFF) == __KERNEXEC_KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } @@ -383,13 +417,16 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) container_of(task_pt_regs(current), struct bad_iret_stack, regs); + if ((current->thread.sp0 ^ (unsigned long)s) < THREAD_SIZE) + new_stack = s; + /* Copy the IRET target to the new stack. */ memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); /* Copy the remainder of the stack from the current stack. */ memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); - BUG_ON(!user_mode_vm(&new_stack->regs)); + BUG_ON(!user_mode(&new_stack->regs)); return new_stack; } #endif @@ -435,7 +472,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. */ - if (!dr6 && user_mode_vm(regs)) + if (!dr6 && user_mode(regs)) user_icebp = 1; /* Catch kmemcheck conditions first of all! */ @@ -460,7 +497,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); preempt_conditional_cli(regs); @@ -474,7 +511,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) * We already checked v86 mode above, so we can check for kernel mode * by just checking the CPL of CS. */ - if ((dr6 & DR_STEP) && !user_mode(regs)) { + if ((dr6 & DR_STEP) && !user_mode_novm(regs)) { tsk->thread.debugreg6 &= ~DR_STEP; set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; @@ -504,7 +541,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) return; conditional_sti(regs); - if (!user_mode_vm(regs)) + if (!user_mode(regs)) { if (!fixup_exception(regs)) { task->thread.error_code = error_code; @@ -617,8 +654,8 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) void __math_state_restore(struct task_struct *tsk) { /* We need a safe address that is cheap to find and that is already - in L1. We've just brought in "tsk->thread.has_fpu", so use that */ -#define safe_address (tsk->thread.has_fpu) + in L1. */ +#define safe_address (init_tss[raw_smp_processor_id()].x86_tss.sp0) /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index b9242ba..50c5edd 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -20,6 +20,7 @@ * arch/x86/boot/compressed/head_64.S: Boot cpu verification * arch/x86/kernel/trampoline_64.S: secondary processor verification * arch/x86/kernel/head_32.S: processor startup + * arch/x86/kernel/acpi/realmode/wakeup.S: 32bit processor resume * * verify_cpu, returns the status of longmode and SSE in register %eax. * 0: Success 1: Failure diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 04b8726..0c35b29 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -148,7 +149,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.sp0 = current->thread.saved_sp0; current->thread.sysenter_cs = __KERNEL_CS; load_sp0(tss, ¤t->thread); @@ -210,6 +211,13 @@ int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) struct task_struct *tsk; int tmp, ret = -EPERM; +#ifdef CONFIG_GRKERNSEC_VM86 + if (!capable(CAP_SYS_RAWIO)) { + gr_handle_vm86(); + goto out; + } +#endif + tsk = current; if (tsk->thread.saved_sp0) goto out; @@ -240,6 +248,14 @@ int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) int tmp, ret; struct vm86plus_struct __user *v86; +#ifdef CONFIG_GRKERNSEC_VM86 + if (!capable(CAP_SYS_RAWIO)) { + gr_handle_vm86(); + ret = -EPERM; + goto out; + } +#endif + tsk = current; switch (cmd) { case VM86_REQUEST_IRQ: @@ -326,7 +342,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_gs = get_user_gs(info->regs32); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; @@ -531,7 +547,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, goto cannot_handle; if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored)) goto cannot_handle; - intr_ptr = (unsigned long __user *) (i << 2); + intr_ptr = (__force unsigned long __user *) (i << 2); if (get_user(segoffs, intr_ptr)) goto cannot_handle; if ((segoffs >> 16) == BIOSSEG) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0f703f1..045a8f1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -26,6 +26,13 @@ #include #include #include +#include + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) +#define __KERNEL_TEXT_OFFSET (LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR) +#else +#define __KERNEL_TEXT_OFFSET 0 +#endif #undef i386 /* in case the preprocessor is a 32bit one */ @@ -69,30 +76,43 @@ jiffies_64 = jiffies; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ +#ifdef CONFIG_X86_32 + module PT_LOAD FLAGS(5); /* R_E */ +#endif +#ifdef CONFIG_XEN + rodata PT_LOAD FLAGS(5); /* R_E */ +#else + rodata PT_LOAD FLAGS(4); /* R__ */ +#endif data PT_LOAD FLAGS(6); /* RW_ */ -#ifdef CONFIG_X86_64 + init.begin PT_LOAD FLAGS(6); /* RW_ */ #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(6); /* RW_ */ #endif + text.init PT_LOAD FLAGS(5); /* R_E */ + text.exit PT_LOAD FLAGS(5); /* R_E */ init PT_LOAD FLAGS(7); /* RWE */ -#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { #ifdef CONFIG_X86_32 - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; + . = LOAD_OFFSET + ____LOAD_PHYSICAL_ADDR; #else - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; + . = __START_KERNEL; #endif /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; + .text (. - __KERNEL_TEXT_OFFSET): AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { /* bootstrapping code */ +#ifdef CONFIG_X86_32 + phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; +#else + phys_startup_64 = startup_64 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; +#endif + __LOAD_PHYSICAL_ADDR = . - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; + _text = .; HEAD_TEXT #ifdef CONFIG_X86_32 . = ALIGN(PAGE_SIZE); @@ -108,13 +128,48 @@ SECTIONS IRQENTRY_TEXT *(.fixup) *(.gnu.warning) - /* End of text section */ - _etext = .; } :text = 0x9090 - NOTES :text :note + . += __KERNEL_TEXT_OFFSET; - EXCEPTION_TABLE(16) :text = 0x9090 +#ifdef CONFIG_X86_32 + . = ALIGN(PAGE_SIZE); + .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { + +#ifdef CONFIG_PAX_KERNEXEC + MODULES_EXEC_VADDR = .; + BYTE(0) + . += (CONFIG_PAX_KERNEXEC_MODULE_TEXT * 1024 * 1024); + . = ALIGN(HPAGE_SIZE) - 1; + MODULES_EXEC_END = .; +#endif + + } :module +#endif + + .text.end : AT(ADDR(.text.end) - LOAD_OFFSET) { + /* End of text section */ + BYTE(0) + _etext = . - __KERNEL_TEXT_OFFSET; + } + +#ifdef CONFIG_X86_32 + . = ALIGN(PAGE_SIZE); + .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { + *(.idt) + . = ALIGN(PAGE_SIZE); + *(.empty_zero_page) + *(.initial_pg_fixmap) + *(.initial_pg_pmd) + *(.initial_page_table) + *(.swapper_pg_dir) + } :rodata +#endif + + . = ALIGN(PAGE_SIZE); + NOTES :rodata :note + + EXCEPTION_TABLE(16) :rodata #if defined(CONFIG_DEBUG_RODATA) /* .text should occupy whole number of pages */ @@ -126,16 +181,20 @@ SECTIONS /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(HPAGE_SIZE); +#else + . = ALIGN(PAGE_SIZE); +#endif + /* Start of data section */ _sdata = .; /* init_task */ INIT_TASK_DATA(THREAD_SIZE) -#ifdef CONFIG_X86_32 - /* 32 bit has nosave before _edata */ NOSAVE_DATA -#endif PAGE_ALIGNED_DATA(PAGE_SIZE) @@ -176,12 +235,19 @@ SECTIONS #endif /* CONFIG_X86_64 */ /* Init code and data - will be freed after init */ - . = ALIGN(PAGE_SIZE); .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { + BYTE(0) + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(HPAGE_SIZE); +#else + . = ALIGN(PAGE_SIZE); +#endif + __init_begin = .; /* paired with __init_end */ - } + } :init.begin -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - .init.text - should @@ -190,12 +256,27 @@ SECTIONS PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) #endif - INIT_TEXT_SECTION(PAGE_SIZE) -#ifdef CONFIG_X86_64 - :init -#endif + . = ALIGN(PAGE_SIZE); + init_begin = .; + .init.text (. - __KERNEL_TEXT_OFFSET): AT(init_begin - LOAD_OFFSET) { + VMLINUX_SYMBOL(_sinittext) = .; + INIT_TEXT + . = ALIGN(PAGE_SIZE); + } :text.init - INIT_DATA_SECTION(16) + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + EXIT_TEXT + VMLINUX_SYMBOL(_einittext) = .; + . = ALIGN(16); + } :text.exit + . = init_begin + SIZEOF(.init.text) + SIZEOF(.exit.text); + + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(16) :init /* * Code and data for a variety of lowlevel trampolines, to be @@ -269,19 +350,12 @@ SECTIONS } . = ALIGN(8); - /* - * .exit.text is discard at runtime, not link time, to deal with - * references from .altinstructions and .eh_frame - */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { EXIT_DATA } -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) +#ifndef CONFIG_SMP PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif @@ -300,16 +374,10 @@ SECTIONS .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { __smp_locks = .; *(.smp_locks) - . = ALIGN(PAGE_SIZE); __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); } -#ifdef CONFIG_X86_64 - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - NOSAVE_DATA - } -#endif - /* BSS */ . = ALIGN(PAGE_SIZE); .bss : AT(ADDR(.bss) - LOAD_OFFSET) { @@ -325,6 +393,7 @@ SECTIONS __brk_base = .; . += 64 * 1024; /* 64k alignment slop space */ *(.brk_reservation) /* areas brk users have reserved */ + . = ALIGN(HPAGE_SIZE); __brk_limit = .; } @@ -351,13 +420,12 @@ SECTIONS * for the boot processor. */ #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load -INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), +. = ASSERT((_end - _text - __KERNEL_TEXT_OFFSET <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22..47ee71f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -57,15 +57,13 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), }; -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; +static enum { EMULATE, NONE } vsyscall_mode = EMULATE; static int __init vsyscall_setup(char *str) { if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; - else if (!strcmp("native", str)) - vsyscall_mode = NATIVE; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -178,7 +176,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) tsk = current; if (seccomp_mode(&tsk->seccomp)) - do_exit(SIGKILL); + do_group_exit(SIGKILL); switch (vsyscall_nr) { case 0: @@ -220,8 +218,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) return true; sigsegv: - force_sig(SIGSEGV, current); - return true; + do_group_exit(SIGKILL); } /* @@ -274,10 +271,7 @@ void __init map_vsyscall(void) extern char __vvar_page; unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); - __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR); BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != (unsigned long)VSYSCALL_START); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f..f686fbf 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -29,8 +29,6 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic_string); EXPORT_SYMBOL(copy_user_generic_unrolled); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(_copy_from_user); -EXPORT_SYMBOL(_copy_to_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c1d6cd5..204ac00 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -90,14 +90,14 @@ struct x86_init_ops x86_init __initdata = { }, }; -struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { +struct x86_cpuinit_ops x86_cpuinit __cpuinitconst = { .setup_percpu_clockev = setup_secondary_APIC_clock, }; static void default_nmi_init(void) { }; static int default_i8042_detect(void) { return 1; }; -struct x86_platform_ops x86_platform = { +struct x86_platform_ops x86_platform __read_only = { .calibrate_tsc = native_calibrate_tsc, .wallclock_init = wallclock_init_noop, .get_wallclock = mach_get_cmos_time, @@ -110,7 +110,7 @@ struct x86_platform_ops x86_platform = { }; EXPORT_SYMBOL_GPL(x86_platform); -struct x86_msi_ops x86_msi = { +struct x86_msi_ops x86_msi __read_only = { .setup_msi_irqs = native_setup_msi_irqs, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 7110911..069da9c 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -130,7 +130,7 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf, fx_sw_user->xstate_size > fx_sw_user->extended_size) return -EINVAL; - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + + err = __get_user(magic2, (__u32 __user *) (((void __user *)fpstate) + fx_sw_user->extended_size - FP_XSTATE_MAGIC2_SIZE)); if (err) @@ -266,7 +266,7 @@ fx_only: * the other extended state. */ xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); + return fxrstor_checking((struct i387_fxsave_struct __user *)buf); } /* @@ -295,8 +295,7 @@ int restore_i387_xstate(void __user *buf) if (use_xsave()) err = restore_user_xstate(buf); else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); + err = fxrstor_checking((struct i387_fxsave_struct __user *)buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9af0b82..086874c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -249,6 +249,7 @@ struct gprefix { #define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \ do { \ + unsigned long _tmp; \ __asm__ __volatile__ ( \ _PRE_EFLAGS("0", "4", "2") \ _op _suffix " %"_x"3,%1; " \ @@ -263,8 +264,6 @@ struct gprefix { /* Raw emulation: instruction has two explicit operands. */ #define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long _tmp; \ - \ switch ((ctxt)->dst.bytes) { \ case 2: \ ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \ @@ -280,7 +279,6 @@ struct gprefix { #define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long _tmp; \ switch ((ctxt)->dst.bytes) { \ case 1: \ ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \ @@ -383,8 +381,7 @@ struct gprefix { _ASM_EXTABLE(1b, 3b) \ : "=m" ((ctxt)->eflags), "=&r" (_tmp), \ "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \ - : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val), \ - "a" (*rax), "d" (*rdx)); \ + : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \ } while (0) /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */ @@ -3013,7 +3010,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) int cr = ctxt->modrm_reg; u64 efer = 0; - static u64 cr_reserved_bits[] = { + static const u64 cr_reserved_bits[] = { 0xffffffff00000000ULL, 0, 0, 0, /* CR3 checked later */ CR4_RESERVED_BITS, @@ -3048,7 +3045,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (efer & EFER_LMA) - rsvd = CR3_L_MODE_RESERVED_BITS; + rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD; else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE) rsvd = CR3_PAE_RESERVED_BITS; else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 176205a..920cd58 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -53,7 +53,7 @@ #define APIC_BUS_CYCLE_NS 1 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) #define APIC_LVT_NUM 6 /* 14 is the version for Xeon and Pentium 8.4.8*/ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index bfc9507..bf85b38 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3558,7 +3558,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); + invlpg_counter = atomic_read_unchecked(&vcpu->kvm->arch.invlpg_counter); /* * Assume that the pte write on a page table of the same type @@ -3590,7 +3590,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, } spin_lock(&vcpu->kvm->mmu_lock); - if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) + if (atomic_read_unchecked(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) gentry = 0; kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9299410..ade2f9b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -197,7 +197,7 @@ retry_walk: if (unlikely(kvm_is_error_hva(host_addr))) goto error; - ptep_user = (pt_element_t __user *)((void *)host_addr + offset); + ptep_user = (pt_element_t __force_user *)((void *)host_addr + offset); if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) goto error; @@ -705,7 +705,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); - atomic_inc(&vcpu->kvm->arch.invlpg_counter); + atomic_inc_unchecked(&vcpu->kvm->arch.invlpg_counter); spin_unlock(&vcpu->kvm->mmu_lock); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7a2d9d6..0e8286c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3403,7 +3403,11 @@ static void reload_tss(struct kvm_vcpu *vcpu) int cpu = raw_smp_processor_id(); struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + + pax_open_kernel(); sd->tss_desc->type = 9; /* available 32/64-bit TSS */ + pax_close_kernel(); + load_TR_desc(); } @@ -3783,6 +3787,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif #endif +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) + __set_fs(current_thread_info()->addr_limit); +#endif + reload_tss(vcpu); local_irq_disable(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8831c43..98f1a3e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1100,12 +1100,12 @@ static void vmcs_write64(unsigned long field, u64 value) #endif } -static void vmcs_clear_bits(unsigned long field, u32 mask) +static void vmcs_clear_bits(unsigned long field, unsigned long mask) { vmcs_writel(field, vmcs_readl(field) & ~mask); } -static void vmcs_set_bits(unsigned long field, u32 mask) +static void vmcs_set_bits(unsigned long field, unsigned long mask) { vmcs_writel(field, vmcs_readl(field) | mask); } @@ -1306,7 +1306,11 @@ static void reload_tss(void) struct desc_struct *descs; descs = (void *)gdt->address; + + pax_open_kernel(); descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ + pax_close_kernel(); + load_TR_desc(); } @@ -1505,6 +1509,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */ +#ifdef CONFIG_PAX_PER_CPU_PGD + vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ +#endif + rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ vmx->loaded_vmcs->cpu = cpu; @@ -2635,8 +2643,11 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_flexpriority()) flexpriority_enabled = 0; - if (!cpu_has_vmx_tpr_shadow()) - kvm_x86_ops->update_cr8_intercept = NULL; + if (!cpu_has_vmx_tpr_shadow()) { + pax_open_kernel(); + *(void **)&kvm_x86_ops->update_cr8_intercept = NULL; + pax_close_kernel(); + } if (enable_ept && !cpu_has_vmx_ept_2m_page()) kvm_disable_largepages(); @@ -3638,7 +3649,10 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) unsigned long cr4; vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ + +#ifndef CONFIG_PAX_PER_CPU_PGD vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ +#endif /* Save the most likely value for this task's CR4 in the VMCS. */ cr4 = read_cr4(); @@ -3655,7 +3669,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); - vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */ + vmcs_writel(HOST_RIP, ktla_ktva(tmpl)); /* 22.2.5 */ rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); vmcs_write32(HOST_IA32_SYSENTER_CS, low32); @@ -6206,6 +6220,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "jmp .Lkvm_vmx_return \n\t" ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" ".Lkvm_vmx_return: " + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + "ljmp %[cs],$.Lkvm_vmx_return2\n\t" + ".Lkvm_vmx_return2: " +#endif + /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%"R"sp) \n\t" "pop %0 \n\t" @@ -6254,6 +6274,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), [wordsize]"i"(sizeof(ulong)) + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + ,[cs]"i"(__KERNEL_CS) +#endif + : "cc", "memory" , R"ax", R"bx", R"di", R"si" #ifdef CONFIG_X86_64 @@ -6282,7 +6307,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) } } - asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); + asm("mov %0, %%ds; mov %0, %%es; mov %0, %%ss" : : "r"(__KERNEL_DS)); + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_KERNEXEC) + loadsegment(fs, __KERNEL_PERCPU); +#endif + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) + __set_fs(current_thread_info()->addr_limit); +#endif + vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bb179cc..2de279c0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -668,6 +668,8 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + cr3 &= ~CR3_PCID_INVD; + if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); kvm_mmu_flush_tlb(vcpu); @@ -1369,8 +1371,8 @@ static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) { struct kvm *kvm = vcpu->kvm; int lm = is_long_mode(vcpu); - u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 - : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; + u8 __user *blob_addr = lm ? (u8 __user *)(long)kvm->arch.xen_hvm_config.blob_addr_64 + : (u8 __user *)(long)kvm->arch.xen_hvm_config.blob_addr_32; u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 : kvm->arch.xen_hvm_config.blob_size_32; u32 page_num = data & ~PAGE_MASK; @@ -2187,6 +2189,8 @@ long kvm_arch_dev_ioctl(struct file *filp, if (n < msr_list.nmsrs) goto out; r = -EFAULT; + if (num_msrs_to_save > ARRAY_SIZE(msrs_to_save)) + goto out; if (copy_to_user(user_msr_list->indices, &msrs_to_save, num_msrs_to_save * sizeof(u32))) goto out; @@ -2362,15 +2366,20 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - int r; + int r, i; r = -E2BIG; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) goto out; r = -EFAULT; - if (copy_from_user(&vcpu->arch.cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry2))) + if (!access_ok(VERIFY_READ, entries, cpuid->nent * sizeof(struct kvm_cpuid_entry2))) goto out; + for (i = 0; i < cpuid->nent; ++i) { + struct kvm_cpuid_entry2 cpuid_entry; + if (__copy_from_user(&cpuid_entry, entries + i, sizeof(cpuid_entry))) + goto out; + vcpu->arch.cpuid_entries[i] = cpuid_entry; + } vcpu->arch.cpuid_nent = cpuid->nent; kvm_apic_set_version(vcpu); kvm_x86_ops->cpuid_update(vcpu); @@ -2385,15 +2394,19 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { - int r; + int r, i; r = -E2BIG; if (cpuid->nent < vcpu->arch.cpuid_nent) goto out; r = -EFAULT; - if (copy_to_user(entries, &vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + if (!access_ok(VERIFY_WRITE, entries, vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) goto out; + for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { + struct kvm_cpuid_entry2 cpuid_entry = vcpu->arch.cpuid_entries[i]; + if (__copy_to_user(entries + i, &cpuid_entry, sizeof(cpuid_entry))) + goto out; + } return 0; out: @@ -2768,7 +2781,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= 256) + if (irq->irq >= 256) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; @@ -5209,7 +5222,7 @@ static void kvm_set_mmio_spte_mask(void) kvm_mmu_set_mmio_spte_mask(mask); } -int kvm_arch_init(void *opaque) +int kvm_arch_init(const void *opaque) { int r; struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f4fda4..353d5cc 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1195,9 +1195,10 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) * Rebooting also tells the Host we're finished, but the RESTART flag tells the * Launcher to reboot us. */ -static void lguest_restart(char *reason) +static __noreturn void lguest_restart(char *reason) { hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0); + BUG(); } /*G:050 diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 042f682..c92afb6 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c @@ -8,18 +8,30 @@ long long atomic64_read_cx8(long long, const atomic64_t *v); EXPORT_SYMBOL(atomic64_read_cx8); +long long atomic64_read_unchecked_cx8(long long, const atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_read_unchecked_cx8); long long atomic64_set_cx8(long long, const atomic64_t *v); EXPORT_SYMBOL(atomic64_set_cx8); +long long atomic64_set_unchecked_cx8(long long, const atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_set_unchecked_cx8); long long atomic64_xchg_cx8(long long, unsigned high); EXPORT_SYMBOL(atomic64_xchg_cx8); long long atomic64_add_return_cx8(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_add_return_cx8); +long long atomic64_add_return_unchecked_cx8(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_add_return_unchecked_cx8); long long atomic64_sub_return_cx8(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_sub_return_cx8); +long long atomic64_sub_return_unchecked_cx8(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_sub_return_unchecked_cx8); long long atomic64_inc_return_cx8(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_inc_return_cx8); +long long atomic64_inc_return_unchecked_cx8(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_inc_return_unchecked_cx8); long long atomic64_dec_return_cx8(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_dec_return_cx8); +long long atomic64_dec_return_unchecked_cx8(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_dec_return_unchecked_cx8); long long atomic64_dec_if_positive_cx8(atomic64_t *v); EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); int atomic64_inc_not_zero_cx8(atomic64_t *v); @@ -30,26 +42,46 @@ EXPORT_SYMBOL(atomic64_add_unless_cx8); #ifndef CONFIG_X86_CMPXCHG64 long long atomic64_read_386(long long, const atomic64_t *v); EXPORT_SYMBOL(atomic64_read_386); +long long atomic64_read_unchecked_386(long long, const atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_read_unchecked_386); long long atomic64_set_386(long long, const atomic64_t *v); EXPORT_SYMBOL(atomic64_set_386); +long long atomic64_set_unchecked_386(long long, const atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_set_unchecked_386); long long atomic64_xchg_386(long long, unsigned high); EXPORT_SYMBOL(atomic64_xchg_386); long long atomic64_add_return_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_add_return_386); +long long atomic64_add_return_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_add_return_unchecked_386); long long atomic64_sub_return_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_sub_return_386); +long long atomic64_sub_return_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_sub_return_unchecked_386); long long atomic64_inc_return_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_inc_return_386); +long long atomic64_inc_return_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_inc_return_unchecked_386); long long atomic64_dec_return_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_dec_return_386); +long long atomic64_dec_return_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_dec_return_unchecked_386); long long atomic64_add_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_add_386); +long long atomic64_add_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_add_unchecked_386); long long atomic64_sub_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_sub_386); +long long atomic64_sub_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_sub_unchecked_386); long long atomic64_inc_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_inc_386); +long long atomic64_inc_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_inc_unchecked_386); long long atomic64_dec_386(long long a, atomic64_t *v); EXPORT_SYMBOL(atomic64_dec_386); +long long atomic64_dec_unchecked_386(long long a, atomic64_unchecked_t *v); +EXPORT_SYMBOL(atomic64_dec_unchecked_386); long long atomic64_dec_if_positive_386(atomic64_t *v); EXPORT_SYMBOL(atomic64_dec_if_positive_386); int atomic64_inc_not_zero_386(atomic64_t *v); diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index e8e7e0d..56fd1b0 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -48,6 +48,10 @@ BEGIN(read) movl (v), %eax movl 4(v), %edx RET_ENDP +BEGIN(read_unchecked) + movl (v), %eax + movl 4(v), %edx +RET_ENDP #undef v #define v %esi @@ -55,6 +59,10 @@ BEGIN(set) movl %ebx, (v) movl %ecx, 4(v) RET_ENDP +BEGIN(set_unchecked) + movl %ebx, (v) + movl %ecx, 4(v) +RET_ENDP #undef v #define v %esi @@ -70,6 +78,20 @@ RET_ENDP BEGIN(add) addl %eax, (v) adcl %edx, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT + jno 0f + subl %eax, (v) + sbbl %edx, 4(v) + int $4 +0: + _ASM_EXTABLE(0b, 0b) +#endif + +RET_ENDP +BEGIN(add_unchecked) + addl %eax, (v) + adcl %edx, 4(v) RET_ENDP #undef v @@ -77,6 +99,24 @@ RET_ENDP BEGIN(add_return) addl (v), %eax adcl 4(v), %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + + movl %eax, (v) + movl %edx, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT +2: +#endif + +RET_ENDP +BEGIN(add_return_unchecked) + addl (v), %eax + adcl 4(v), %edx movl %eax, (v) movl %edx, 4(v) RET_ENDP @@ -86,6 +126,20 @@ RET_ENDP BEGIN(sub) subl %eax, (v) sbbl %edx, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT + jno 0f + addl %eax, (v) + adcl %edx, 4(v) + int $4 +0: + _ASM_EXTABLE(0b, 0b) +#endif + +RET_ENDP +BEGIN(sub_unchecked) + subl %eax, (v) + sbbl %edx, 4(v) RET_ENDP #undef v @@ -96,6 +150,27 @@ BEGIN(sub_return) sbbl $0, %edx addl (v), %eax adcl 4(v), %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + + movl %eax, (v) + movl %edx, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT +2: +#endif + +RET_ENDP +BEGIN(sub_return_unchecked) + negl %edx + negl %eax + sbbl $0, %edx + addl (v), %eax + adcl 4(v), %edx movl %eax, (v) movl %edx, 4(v) RET_ENDP @@ -105,6 +180,20 @@ RET_ENDP BEGIN(inc) addl $1, (v) adcl $0, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT + jno 0f + subl $1, (v) + sbbl $0, 4(v) + int $4 +0: + _ASM_EXTABLE(0b, 0b) +#endif + +RET_ENDP +BEGIN(inc_unchecked) + addl $1, (v) + adcl $0, 4(v) RET_ENDP #undef v @@ -114,6 +203,26 @@ BEGIN(inc_return) movl 4(v), %edx addl $1, %eax adcl $0, %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + + movl %eax, (v) + movl %edx, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT +2: +#endif + +RET_ENDP +BEGIN(inc_return_unchecked) + movl (v), %eax + movl 4(v), %edx + addl $1, %eax + adcl $0, %edx movl %eax, (v) movl %edx, 4(v) RET_ENDP @@ -123,6 +232,20 @@ RET_ENDP BEGIN(dec) subl $1, (v) sbbl $0, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT + jno 0f + addl $1, (v) + adcl $0, 4(v) + int $4 +0: + _ASM_EXTABLE(0b, 0b) +#endif + +RET_ENDP +BEGIN(dec_unchecked) + subl $1, (v) + sbbl $0, 4(v) RET_ENDP #undef v @@ -132,6 +255,26 @@ BEGIN(dec_return) movl 4(v), %edx subl $1, %eax sbbl $0, %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + + movl %eax, (v) + movl %edx, 4(v) + +#ifdef CONFIG_PAX_REFCOUNT +2: +#endif + +RET_ENDP +BEGIN(dec_return_unchecked) + movl (v), %eax + movl 4(v), %edx + subl $1, %eax + sbbl $0, %edx movl %eax, (v) movl %edx, 4(v) RET_ENDP @@ -143,6 +286,13 @@ BEGIN(add_unless) adcl %edx, %edi addl (v), %eax adcl 4(v), %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + cmpl %eax, %esi je 3f 1: @@ -168,6 +318,13 @@ BEGIN(inc_not_zero) 1: addl $1, %eax adcl $0, %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + movl %eax, (v) movl %edx, 4(v) movl $1, %eax @@ -186,6 +343,13 @@ BEGIN(dec_if_positive) movl 4(v), %edx subl $1, %eax sbbl $0, %edx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 1f) +#endif + js 1f movl %eax, (v) movl %edx, 4(v) diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 391a083..3a2cf39 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -35,10 +35,20 @@ ENTRY(atomic64_read_cx8) CFI_STARTPROC read64 %ecx + pax_force_retaddr ret CFI_ENDPROC ENDPROC(atomic64_read_cx8) +ENTRY(atomic64_read_unchecked_cx8) + CFI_STARTPROC + + read64 %ecx + pax_force_retaddr + ret + CFI_ENDPROC +ENDPROC(atomic64_read_unchecked_cx8) + ENTRY(atomic64_set_cx8) CFI_STARTPROC @@ -48,10 +58,25 @@ ENTRY(atomic64_set_cx8) cmpxchg8b (%esi) jne 1b + pax_force_retaddr ret CFI_ENDPROC ENDPROC(atomic64_set_cx8) +ENTRY(atomic64_set_unchecked_cx8) + CFI_STARTPROC + +1: +/* we don't need LOCK_PREFIX since aligned 64-bit writes + * are atomic on 586 and newer */ + cmpxchg8b (%esi) + jne 1b + + pax_force_retaddr + ret + CFI_ENDPROC +ENDPROC(atomic64_set_unchecked_cx8) + ENTRY(atomic64_xchg_cx8) CFI_STARTPROC @@ -62,12 +87,13 @@ ENTRY(atomic64_xchg_cx8) cmpxchg8b (%esi) jne 1b + pax_force_retaddr ret CFI_ENDPROC ENDPROC(atomic64_xchg_cx8) -.macro addsub_return func ins insc -ENTRY(atomic64_\func\()_return_cx8) +.macro addsub_return func ins insc unchecked="" +ENTRY(atomic64_\func\()_return\unchecked\()_cx8) CFI_STARTPROC SAVE ebp SAVE ebx @@ -84,27 +110,44 @@ ENTRY(atomic64_\func\()_return_cx8) movl %edx, %ecx \ins\()l %esi, %ebx \insc\()l %edi, %ecx + +.ifb \unchecked +#ifdef CONFIG_PAX_REFCOUNT + into +2: + _ASM_EXTABLE(2b, 3f) +#endif +.endif + LOCK_PREFIX cmpxchg8b (%ebp) jne 1b - -10: movl %ebx, %eax movl %ecx, %edx + +.ifb \unchecked +#ifdef CONFIG_PAX_REFCOUNT +3: +#endif +.endif + RESTORE edi RESTORE esi RESTORE ebx RESTORE ebp + pax_force_retaddr ret CFI_ENDPROC -ENDPROC(atomic64_\func\()_return_cx8) +ENDPROC(atomic64_\func\()_return\unchecked\()_cx8) .endm addsub_return add add adc addsub_return sub sub sbb +addsub_return add add adc _unchecked +addsub_return sub sub sbb _unchecked -.macro incdec_return func ins insc -ENTRY(atomic64_\func\()_return_cx8) +.macro incdec_return func ins insc unchecked="" +ENTRY(atomic64_\func\()_return\unchecked\()_cx8) CFI_STARTPROC SAVE ebx @@ -114,21 +157,39 @@ ENTRY(atomic64_\func\()_return_cx8) movl %edx, %ecx \ins\()l $1, %ebx \insc\()l $0, %ecx + +.ifb \unchecked +#ifdef CONFIG_PAX_REFCOUNT + into +2: + _ASM_EXTABLE(2b, 3f) +#endif +.endif + LOCK_PREFIX cmpxchg8b (%esi) jne 1b -10: movl %ebx, %eax movl %ecx, %edx + +.ifb \unchecked +#ifdef CONFIG_PAX_REFCOUNT +3: +#endif +.endif + RESTORE ebx + pax_force_retaddr ret CFI_ENDPROC -ENDPROC(atomic64_\func\()_return_cx8) +ENDPROC(atomic64_\func\()_return\unchecked\()_cx8) .endm incdec_return inc add adc incdec_return dec sub sbb +incdec_return inc add adc _unchecked +incdec_return dec sub sbb _unchecked ENTRY(atomic64_dec_if_positive_cx8) CFI_STARTPROC @@ -140,6 +201,13 @@ ENTRY(atomic64_dec_if_positive_cx8) movl %edx, %ecx subl $1, %ebx sbb $0, %ecx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 2f) +#endif + js 2f LOCK_PREFIX cmpxchg8b (%esi) @@ -149,6 +217,7 @@ ENTRY(atomic64_dec_if_positive_cx8) movl %ebx, %eax movl %ecx, %edx RESTORE ebx + pax_force_retaddr ret CFI_ENDPROC ENDPROC(atomic64_dec_if_positive_cx8) @@ -174,6 +243,13 @@ ENTRY(atomic64_add_unless_cx8) movl %edx, %ecx addl %esi, %ebx adcl %edi, %ecx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 3f) +#endif + LOCK_PREFIX cmpxchg8b (%ebp) jne 1b @@ -184,6 +260,7 @@ ENTRY(atomic64_add_unless_cx8) CFI_ADJUST_CFA_OFFSET -8 RESTORE ebx RESTORE ebp + pax_force_retaddr ret 4: cmpl %edx, 4(%esp) @@ -206,6 +283,13 @@ ENTRY(atomic64_inc_not_zero_cx8) movl %edx, %ecx addl $1, %ebx adcl $0, %ecx + +#ifdef CONFIG_PAX_REFCOUNT + into +1234: + _ASM_EXTABLE(1234b, 3f) +#endif + LOCK_PREFIX cmpxchg8b (%esi) jne 1b @@ -213,6 +297,7 @@ ENTRY(atomic64_inc_not_zero_cx8) movl $1, %eax 3: RESTORE ebx + pax_force_retaddr ret 4: testl %edx, %edx diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 78d16a5..fbcf666 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -28,7 +28,8 @@ #include #include #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -296,9 +297,24 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, #define ARGBASE 16 #define FP 12 - -ENTRY(csum_partial_copy_generic) + +ENTRY(csum_partial_copy_generic_to_user) CFI_STARTPROC + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl_cfi %gs + popl_cfi %es + jmp csum_partial_copy_generic +#endif + +ENTRY(csum_partial_copy_generic_from_user) + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl_cfi %gs + popl_cfi %ds +#endif + +ENTRY(csum_partial_copy_generic) subl $4,%esp CFI_ADJUST_CFA_OFFSET 4 pushl_cfi %edi @@ -320,7 +336,7 @@ ENTRY(csum_partial_copy_generic) jmp 4f SRC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +DST( movw %bx, %es:(%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -332,30 +348,30 @@ DST( movw %bx, (%edi) ) SRC(1: movl (%esi), %ebx ) SRC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +DST( movl %edx, %es:4(%edi) ) SRC( movl 8(%esi), %ebx ) SRC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +DST( movl %ebx, %es:8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +DST( movl %edx, %es:12(%edi) ) SRC( movl 16(%esi), %ebx ) SRC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +DST( movl %ebx, %es:16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +DST( movl %edx, %es:20(%edi) ) SRC( movl 24(%esi), %ebx ) SRC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +DST( movl %ebx, %es:24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +DST( movl %edx, %es:28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -369,7 +385,7 @@ DST( movl %edx, 28(%edi) ) shrl $2, %edx # This clears CF SRC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -381,12 +397,12 @@ DST( movl %ebx, (%edi) ) jb 5f SRC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +DST( movw %cx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +DST( movb %cl, %es:(%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: @@ -397,7 +413,7 @@ DST( movb %cl, (%edi) ) 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination - computing the rest # is too much work @@ -410,11 +426,15 @@ DST( movb %cl, (%edi) ) 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) + movl $-EFAULT,%ss:(%ebx) jmp 5000b .previous + pushl_cfi %ss + popl_cfi %ds + pushl_cfi %ss + popl_cfi %es popl_cfi %ebx CFI_RESTORE ebx popl_cfi %esi @@ -424,26 +444,43 @@ DST( movb %cl, (%edi) ) popl_cfi %ecx # equivalent to addl $4,%esp ret CFI_ENDPROC -ENDPROC(csum_partial_copy_generic) +ENDPROC(csum_partial_copy_generic_to_user) #else /* Version for PentiumII/PPro */ #define ROUND1(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)) ; #define ROUND(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)) ; #define ARGBASE 12 - -ENTRY(csum_partial_copy_generic) + +ENTRY(csum_partial_copy_generic_to_user) CFI_STARTPROC + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl_cfi %gs + popl_cfi %es + jmp csum_partial_copy_generic +#endif + +ENTRY(csum_partial_copy_generic_from_user) + +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl_cfi %gs + popl_cfi %ds +#endif + +ENTRY(csum_partial_copy_generic) pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 pushl_cfi %edi @@ -464,7 +501,7 @@ ENTRY(csum_partial_copy_generic) subl %ebx, %edi lea -1(%esi),%edx andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx + lea 3f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx 1: addl $64,%esi @@ -485,19 +522,19 @@ ENTRY(csum_partial_copy_generic) jb 5f SRC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +DST( movw %dx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +DST( movb %dl, %es:(%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination (computing the rest is too much work) movl ARGBASE+8(%esp),%edi # dst movl ARGBASE+12(%esp),%ecx # len @@ -505,10 +542,17 @@ DST( movb %dl, (%edi) ) rep; stosb jmp 7b 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) jmp 7b .previous +#ifdef CONFIG_PAX_MEMORY_UDEREF + pushl_cfi %ss + popl_cfi %ds + pushl_cfi %ss + popl_cfi %es +#endif + popl_cfi %esi CFI_RESTORE esi popl_cfi %edi @@ -517,7 +561,7 @@ DST( movb %dl, (%edi) ) CFI_RESTORE ebx ret CFI_ENDPROC -ENDPROC(csum_partial_copy_generic) +ENDPROC(csum_partial_copy_generic_to_user) #undef ROUND #undef ROUND1 diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f2145cf..cea889d 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -11,6 +11,7 @@ ENTRY(clear_page_c) movl $4096/8,%ecx xorl %eax,%eax rep stosq + pax_force_retaddr ret CFI_ENDPROC ENDPROC(clear_page_c) @@ -20,6 +21,7 @@ ENTRY(clear_page_c_e) movl $4096,%ecx xorl %eax,%eax rep stosb + pax_force_retaddr ret CFI_ENDPROC ENDPROC(clear_page_c_e) @@ -43,6 +45,7 @@ ENTRY(clear_page) leaq 64(%rdi),%rdi jnz .Lloop nop + pax_force_retaddr ret CFI_ENDPROC .Lclear_page_end: @@ -58,7 +61,7 @@ ENDPROC(clear_page) #include - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 1: .byte 0xeb /* jmp */ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ 2: .byte 0xeb /* jmp */ diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 1e572c5..2a162cd 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -53,11 +53,13 @@ this_cpu_cmpxchg16b_emu: popf mov $1, %al + pax_force_retaddr ret not_same: popf xor %al,%al + pax_force_retaddr ret CFI_ENDPROC diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 01c805b..16da7cf 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -9,6 +9,7 @@ copy_page_c: CFI_STARTPROC movl $4096/8,%ecx rep movsq + pax_force_retaddr ret CFI_ENDPROC ENDPROC(copy_page_c) @@ -24,7 +25,7 @@ ENTRY(copy_page) CFI_ADJUST_CFA_OFFSET 3*8 movq %rbx,(%rsp) CFI_REL_OFFSET rbx, 0 - movq %r12,1*8(%rsp) + movq %r14,1*8(%rsp) CFI_REL_OFFSET r12, 1*8 movq %r13,2*8(%rsp) CFI_REL_OFFSET r13, 2*8 @@ -41,7 +42,7 @@ ENTRY(copy_page) movq 32 (%rsi), %r9 movq 40 (%rsi), %r10 movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 + movq 56 (%rsi), %r14 prefetcht0 5*64(%rsi) @@ -52,7 +53,7 @@ ENTRY(copy_page) movq %r9, 32 (%rdi) movq %r10, 40 (%rdi) movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) + movq %r14, 56 (%rdi) leaq 64 (%rsi), %rsi leaq 64 (%rdi), %rdi @@ -71,7 +72,7 @@ ENTRY(copy_page) movq 32 (%rsi), %r9 movq 40 (%rsi), %r10 movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 + movq 56 (%rsi), %r14 movq %rax, (%rdi) movq %rbx, 8 (%rdi) @@ -80,7 +81,7 @@ ENTRY(copy_page) movq %r9, 32 (%rdi) movq %r10, 40 (%rdi) movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) + movq %r14, 56 (%rdi) leaq 64(%rdi),%rdi leaq 64(%rsi),%rsi @@ -89,12 +90,13 @@ ENTRY(copy_page) movq (%rsp),%rbx CFI_RESTORE rbx - movq 1*8(%rsp),%r12 + movq 1*8(%rsp),%r14 CFI_RESTORE r12 movq 2*8(%rsp),%r13 CFI_RESTORE r13 addq $3*8,%rsp CFI_ADJUST_CFA_OFFSET -3*8 + pax_force_retaddr ret .Lcopy_page_end: CFI_ENDPROC @@ -105,7 +107,7 @@ ENDPROC(copy_page) #include - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 1: .byte 0xeb /* jmp */ .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ 2: diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 0248402..416b737 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -16,6 +16,7 @@ #include #include #include +#include /* * By placing feature2 after feature1 in altinstructions section, we logically @@ -29,7 +30,7 @@ .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: - .section .altinstr_replacement,"ax" + .section .altinstr_replacement,"a" 2: .byte 0xe9 /* near jump with 32bit immediate */ .long \alt1-1b /* offset */ /* or alternatively to alt1 */ 3: .byte 0xe9 /* near jump with 32bit immediate */ @@ -71,47 +72,20 @@ #endif .endm -/* Standard copy_to_user with segment limit checking */ -ENTRY(_copy_to_user) - CFI_STARTPROC - GET_THREAD_INFO(%rax) - movq %rdi,%rcx - addq %rdx,%rcx - jc bad_to_user - cmpq TI_addr_limit(%rax),%rcx - ja bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ - copy_user_generic_unrolled,copy_user_generic_string, \ - copy_user_enhanced_fast_string - CFI_ENDPROC -ENDPROC(_copy_to_user) - -/* Standard copy_from_user with segment limit checking */ -ENTRY(_copy_from_user) - CFI_STARTPROC - GET_THREAD_INFO(%rax) - movq %rsi,%rcx - addq %rdx,%rcx - jc bad_from_user - cmpq TI_addr_limit(%rax),%rcx - ja bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ - copy_user_generic_unrolled,copy_user_generic_string, \ - copy_user_enhanced_fast_string - CFI_ENDPROC -ENDPROC(_copy_from_user) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) bad_from_user: CFI_STARTPROC + testl %edx,%edx + js bad_to_user movl %edx,%ecx xorl %eax,%eax rep stosb bad_to_user: movl %edx,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(bad_from_user) @@ -179,6 +153,7 @@ ENTRY(copy_user_generic_unrolled) decl %ecx jnz 21b 23: xor %eax,%eax + pax_force_retaddr ret .section .fixup,"ax" @@ -251,6 +226,7 @@ ENTRY(copy_user_generic_string) 3: rep movsb 4: xorl %eax,%eax + pax_force_retaddr ret .section .fixup,"ax" @@ -287,6 +263,7 @@ ENTRY(copy_user_enhanced_fast_string) 1: rep movsb 2: xorl %eax,%eax + pax_force_retaddr ret .section .fixup,"ax" diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index cb0c112..cb2d3c5 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -8,12 +8,14 @@ #include #include +#include #define FIX_ALIGNMENT 1 #include #include #include +#include .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT @@ -50,6 +52,15 @@ */ ENTRY(__copy_user_nocache) CFI_STARTPROC + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov pax_user_shadow_base,%rcx + cmp %rcx,%rsi + jae 1f + add %rcx,%rsi +1: +#endif + cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -98,6 +109,7 @@ ENTRY(__copy_user_nocache) jnz 21b 23: xorl %eax,%eax sfence + pax_force_retaddr ret .section .fixup,"ax" diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index fb903b7..83cc6fb 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -8,6 +8,7 @@ #include #include #include +#include /* * Checksum copy with exception handling. @@ -64,8 +65,8 @@ ENTRY(csum_partial_copy_generic) CFI_ADJUST_CFA_OFFSET 7*8 movq %rbx, 2*8(%rsp) CFI_REL_OFFSET rbx, 2*8 - movq %r12, 3*8(%rsp) - CFI_REL_OFFSET r12, 3*8 + movq %r15, 3*8(%rsp) + CFI_REL_OFFSET r15, 3*8 movq %r14, 4*8(%rsp) CFI_REL_OFFSET r14, 4*8 movq %r13, 5*8(%rsp) @@ -80,16 +81,16 @@ ENTRY(csum_partial_copy_generic) movl %edx, %ecx xorl %r9d, %r9d - movq %rcx, %r12 + movq %rcx, %r15 - shrq $6, %r12 + shrq $6, %r15 jz .Lhandle_tail /* < 64 */ clc /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ - /* r11: temp3, rdx: temp4, r12 loopcnt */ + /* r11: temp3, rdx: temp4, r15 loopcnt */ /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ .p2align 4 .Lloop: @@ -123,7 +124,7 @@ ENTRY(csum_partial_copy_generic) adcq %r14, %rax adcq %r13, %rax - decl %r12d + decl %r15d dest movq %rbx, (%rsi) @@ -218,8 +219,8 @@ ENTRY(csum_partial_copy_generic) .Lende: movq 2*8(%rsp), %rbx CFI_RESTORE rbx - movq 3*8(%rsp), %r12 - CFI_RESTORE r12 + movq 3*8(%rsp), %r15 + CFI_RESTORE r15 movq 4*8(%rsp), %r14 CFI_RESTORE r14 movq 5*8(%rsp), %r13 @@ -228,6 +229,7 @@ ENTRY(csum_partial_copy_generic) CFI_RESTORE rbp addq $7*8, %rsp CFI_ADJUST_CFA_OFFSET -7*8 + pax_force_retaddr ret CFI_RESTORE_STATE diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 459b58a..d67737f 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -52,7 +52,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, len -= 2; } } - isum = csum_partial_copy_generic((__force const void *)src, + isum = csum_partial_copy_generic((const void __force_kernel *)____m(src), dst, len, isum, errp, NULL); if (unlikely(*errp)) goto out_err; @@ -105,7 +105,7 @@ csum_partial_copy_to_user(const void *src, void __user *dst, } *errp = 0; - return csum_partial_copy_generic(src, (void __force *)dst, + return csum_partial_copy_generic(src, (void __force_kernel *)____m(dst), len, isum, NULL, errp); } EXPORT_SYMBOL(csum_partial_copy_to_user); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 51f1504..144f6bd 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -33,15 +33,38 @@ #include #include #include +#include +#include +#include + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __copyuser_seg gs; +#else +#define __copyuser_seg +#endif .text ENTRY(__get_user_1) CFI_STARTPROC + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -1: movzb (%_ASM_AX),%edx + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + +#endif + +1: __copyuser_seg movzb (%_ASM_AX),%edx xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_1) @@ -49,12 +72,26 @@ ENDPROC(__get_user_1) ENTRY(__get_user_2) CFI_STARTPROC add $1,%_ASM_AX + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) jc bad_get_user GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -2: movzwl -1(%_ASM_AX),%edx + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + +#endif + +2: __copyuser_seg movzwl -1(%_ASM_AX),%edx xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_2) @@ -62,12 +99,26 @@ ENDPROC(__get_user_2) ENTRY(__get_user_4) CFI_STARTPROC add $3,%_ASM_AX + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) jc bad_get_user GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user -3: mov -3(%_ASM_AX),%edx + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + +#endif + +3: __copyuser_seg mov -3(%_ASM_AX),%edx xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_4) @@ -80,8 +131,18 @@ ENTRY(__get_user_8) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov pax_user_shadow_base,%_ASM_DX + cmp %_ASM_DX,%_ASM_AX + jae 1234f + add %_ASM_DX,%_ASM_AX +1234: +#endif + 4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__get_user_8) @@ -91,6 +152,7 @@ bad_get_user: CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX + pax_force_retaddr ret CFI_ENDPROC END(bad_get_user) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 374562e..a75830b 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -21,6 +21,11 @@ #include #include #include +#ifdef __KERNEL__ +#include +#else +#define ktla_ktva(addr) addr +#endif /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ @@ -49,8 +54,8 @@ void insn_init(struct insn *insn, const void *kaddr, int x86_64) { memset(insn, 0, sizeof(*insn)); - insn->kaddr = kaddr; - insn->next_byte = kaddr; + insn->kaddr = ktla_ktva(kaddr); + insn->next_byte = ktla_ktva(kaddr); insn->x86_64 = x86_64 ? 1 : 0; insn->opnd_bytes = 4; if (x86_64) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index 05a95e7..326f2fa 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -17,6 +17,7 @@ #include #include +#include /* * override generic version in lib/iomap_copy.c @@ -25,6 +26,7 @@ ENTRY(__iowrite32_copy) CFI_STARTPROC movl %edx,%ecx rep movsd + pax_force_retaddr ret CFI_ENDPROC ENDPROC(__iowrite32_copy) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index efbf2a0..8090894 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -34,6 +34,7 @@ rep movsq movl %edx, %ecx rep movsb + pax_force_retaddr ret .Lmemcpy_e: .previous @@ -51,6 +52,7 @@ movl %edx, %ecx rep movsb + pax_force_retaddr ret .Lmemcpy_e_e: .previous @@ -141,6 +143,7 @@ ENTRY(memcpy) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) + pax_force_retaddr retq .p2align 4 .Lless_16bytes: @@ -153,6 +156,7 @@ ENTRY(memcpy) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) + pax_force_retaddr retq .p2align 4 .Lless_8bytes: @@ -166,6 +170,7 @@ ENTRY(memcpy) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) + pax_force_retaddr retq .p2align 4 .Lless_3bytes: @@ -183,6 +188,7 @@ ENTRY(memcpy) jnz .Lloop_1 .Lend: + pax_force_retaddr retq CFI_ENDPROC ENDPROC(memcpy) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index ee16461..c4f4918 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -202,6 +202,7 @@ ENTRY(memmove) movb (%rsi), %r11b movb %r11b, (%rdi) 13: + pax_force_retaddr retq CFI_ENDPROC @@ -210,6 +211,7 @@ ENTRY(memmove) /* Forward moving data. */ movq %rdx, %rcx rep movsb + pax_force_retaddr retq .Lmemmove_end_forward_efs: .previous diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 79bd454..24b3780 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -31,6 +31,7 @@ movl %r8d,%ecx rep stosb movq %r9,%rax + pax_force_retaddr ret .Lmemset_e: .previous @@ -53,6 +54,7 @@ movl %edx,%ecx rep stosb movq %r9,%rax + pax_force_retaddr ret .Lmemset_e_e: .previous @@ -121,6 +123,7 @@ ENTRY(__memset) .Lende: movq %r10,%rax + pax_force_retaddr ret CFI_RESTORE_STATE diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index c9f2d9b..e7fd2c0 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -29,6 +29,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; int i; + unsigned long cr0; if (unlikely(in_interrupt())) return __memcpy(to, from, len); @@ -39,44 +40,72 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) kernel_fpu_begin(); __asm__ __volatile__ ( - "1: prefetch (%0)\n" /* This set is 28 bytes */ - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" /* This set is 28 bytes */ + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) - : : "r" (from)); + : "=&r" (cr0) : "r" (from) : "ax"); for ( ; i > 5; i--) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movq 8(%1), %%mm1\n" + " movq 16(%1), %%mm2\n" + " movq 24(%1), %%mm3\n" + " movq %%mm0, (%2)\n" + " movq %%mm1, 8(%2)\n" + " movq %%mm2, 16(%2)\n" + " movq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm0\n" + " movq 40(%1), %%mm1\n" + " movq 48(%1), %%mm2\n" + " movq 56(%1), %%mm3\n" + " movq %%mm0, 32(%2)\n" + " movq %%mm1, 40(%2)\n" + " movq %%mm2, 48(%2)\n" + " movq %%mm3, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) - : : "r" (from), "r" (to) : "memory"); + : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from += 64; to += 64; @@ -158,6 +187,7 @@ static void fast_clear_page(void *page) static void fast_copy_page(void *to, void *from) { int i; + unsigned long cr0; kernel_fpu_begin(); @@ -166,42 +196,70 @@ static void fast_copy_page(void *to, void *from) * but that is for later. -AV */ __asm__ __volatile__( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from)); + _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); for (i = 0; i < (4096-320)/64; i++) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movntq %%mm0, (%2)\n" + " movq 8(%1), %%mm1\n" + " movntq %%mm1, 8(%2)\n" + " movq 16(%1), %%mm2\n" + " movntq %%mm2, 16(%2)\n" + " movq 24(%1), %%mm3\n" + " movntq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm4\n" + " movntq %%mm4, 32(%2)\n" + " movq 40(%1), %%mm5\n" + " movntq %%mm5, 40(%2)\n" + " movq 48(%1), %%mm6\n" + " movntq %%mm6, 48(%2)\n" + " movq 56(%1), %%mm7\n" + " movntq %%mm7, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory"); + _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from += 64; to += 64; @@ -280,47 +338,76 @@ static void fast_clear_page(void *page) static void fast_copy_page(void *to, void *from) { int i; + unsigned long cr0; kernel_fpu_begin(); __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) : : "r" (from)); + _ASM_EXTABLE(1b, 3b) : "=&r" (cr0) : "r" (from) : "ax"); for (i = 0; i < 4096/64; i++) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movq 8(%1), %%mm1\n" + " movq 16(%1), %%mm2\n" + " movq 24(%1), %%mm3\n" + " movq %%mm0, (%2)\n" + " movq %%mm1, 8(%2)\n" + " movq %%mm2, 16(%2)\n" + " movq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm0\n" + " movq 40(%1), %%mm1\n" + " movq 48(%1), %%mm2\n" + " movq 56(%1), %%mm3\n" + " movq %%mm0, 32(%2)\n" + " movq %%mm1, 40(%2)\n" + " movq %%mm2, 48(%2)\n" + " movq %%mm3, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" _ASM_EXTABLE(1b, 3b) - : : "r" (from), "r" (to) : "memory"); + : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from += 64; to += 64; diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index 69fa106..234ac7f 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -3,6 +3,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 /* @@ -37,6 +38,7 @@ ENTRY(native_\op\()_safe_regs) movl %edi, 28(%r10) popq_cfi %rbp popq_cfi %rbx + pax_force_retaddr ret 3: CFI_RESTORE_STATE diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 36b0d15..3edf573 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -15,7 +15,9 @@ #include #include #include - +#include +#include +#include /* * __put_user_X @@ -29,52 +31,119 @@ * as they get called from within inline assembly. */ -#define ENTER CFI_STARTPROC ; \ - GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ +#define ENTER CFI_STARTPROC +#define EXIT pax_force_retaddr; ret ; \ CFI_ENDPROC +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define _DEST %_ASM_CX,%_ASM_BX +#else +#define _DEST %_ASM_CX +#endif + +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_MEMORY_UDEREF) +#define __copyuser_seg gs; +#else +#define __copyuser_seg +#endif + .text ENTRY(__put_user_1) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user -1: movb %al,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +1: __copyuser_seg movb %al,(_DEST) xor %eax,%eax EXIT ENDPROC(__put_user_1) ENTRY(__put_user_2) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) mov TI_addr_limit(%_ASM_BX),%_ASM_BX sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user -2: movw %ax,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +2: __copyuser_seg movw %ax,(_DEST) xor %eax,%eax EXIT ENDPROC(__put_user_2) ENTRY(__put_user_4) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) mov TI_addr_limit(%_ASM_BX),%_ASM_BX sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user -3: movl %eax,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +3: __copyuser_seg movl %eax,(_DEST) xor %eax,%eax EXIT ENDPROC(__put_user_4) ENTRY(__put_user_8) ENTER + +#if !defined(CONFIG_X86_32) || !defined(CONFIG_PAX_MEMORY_UDEREF) + GET_THREAD_INFO(%_ASM_BX) mov TI_addr_limit(%_ASM_BX),%_ASM_BX sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user -4: mov %_ASM_AX,(%_ASM_CX) + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + mov pax_user_shadow_base,%_ASM_BX + cmp %_ASM_BX,%_ASM_CX + jb 1234f + xor %ebx,%ebx +1234: +#endif + +#endif + +4: __copyuser_seg mov %_ASM_AX,(_DEST) #ifdef CONFIG_X86_32 -5: movl %edx,4(%_ASM_CX) +5: __copyuser_seg movl %edx,4(_DEST) #endif xor %eax,%eax EXIT diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S index 1cad221..de671ee 100644 --- a/arch/x86/lib/rwlock.S +++ b/arch/x86/lib/rwlock.S @@ -16,13 +16,34 @@ ENTRY(__write_lock_failed) FRAME 0: LOCK_PREFIX WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + 1: rep; nop cmpl $WRITE_LOCK_CMP, (%__lock_ptr) jne 1b LOCK_PREFIX WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + jnz 0b ENDFRAME + pax_force_retaddr ret CFI_ENDPROC END(__write_lock_failed) @@ -32,13 +53,34 @@ ENTRY(__read_lock_failed) FRAME 0: LOCK_PREFIX READ_LOCK_SIZE(inc) (%__lock_ptr) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + READ_LOCK_SIZE(dec) (%__lock_ptr) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + 1: rep; nop READ_LOCK_SIZE(cmp) $1, (%__lock_ptr) js 1b LOCK_PREFIX READ_LOCK_SIZE(dec) (%__lock_ptr) + +#ifdef CONFIG_PAX_REFCOUNT + jno 1234f + LOCK_PREFIX + READ_LOCK_SIZE(inc) (%__lock_ptr) + int $4 +1234: + _ASM_EXTABLE(1234b, 1234b) +#endif + js 0b ENDFRAME + pax_force_retaddr ret CFI_ENDPROC END(__read_lock_failed) diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 5dff5f0..cadebf4 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -94,6 +94,7 @@ ENTRY(call_rwsem_down_read_failed) __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) CFI_RESTORE __ASM_REG(dx) restore_common_regs + pax_force_retaddr ret CFI_ENDPROC ENDPROC(call_rwsem_down_read_failed) @@ -104,6 +105,7 @@ ENTRY(call_rwsem_down_write_failed) movq %rax,%rdi call rwsem_down_write_failed restore_common_regs + pax_force_retaddr ret CFI_ENDPROC ENDPROC(call_rwsem_down_write_failed) @@ -117,7 +119,8 @@ ENTRY(call_rwsem_wake) movq %rax,%rdi call rwsem_wake restore_common_regs -1: ret +1: pax_force_retaddr + ret CFI_ENDPROC ENDPROC(call_rwsem_wake) @@ -131,6 +134,7 @@ ENTRY(call_rwsem_downgrade_wake) __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) CFI_RESTORE __ASM_REG(dx) restore_common_regs + pax_force_retaddr ret CFI_ENDPROC ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index a63efd6..8149fbe 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -8,6 +8,7 @@ #include #include #include +#include /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 @@ -15,11 +16,11 @@ \name: CFI_STARTPROC - /* this one pushes 9 elems, the next one would be %rIP */ - SAVE_ARGS + /* this one pushes 15+1 elems, the next one would be %rIP */ + SAVE_ARGS 8 .if \put_ret_addr_in_rdi - movq_cfi_restore 9*8, rdi + movq_cfi_restore RIP, rdi .endif call \func @@ -38,8 +39,9 @@ /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC - SAVE_ARGS + SAVE_ARGS 8 restore: - RESTORE_ARGS + RESTORE_ARGS 1,8 + pax_force_retaddr ret CFI_ENDPROC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index e218d5d..3966c85 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -43,7 +43,7 @@ do { \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ - "0: lodsb\n" \ + "0: "__copyuser_seg"lodsb\n" \ " stosb\n" \ " testb %%al,%%al\n" \ " jz 1f\n" \ @@ -128,10 +128,12 @@ do { \ int __d0; \ might_fault(); \ __asm__ __volatile__( \ + __COPYUSER_SET_ES \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ "2:\n" \ + __COPYUSER_RESTORE_ES \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -200,6 +202,7 @@ long strnlen_user(const char __user *s, long n) might_fault(); __asm__ __volatile__( + __COPYUSER_SET_ES " testl %0, %0\n" " jz 3f\n" " andl %0,%%ecx\n" @@ -208,6 +211,7 @@ long strnlen_user(const char __user *s, long n) " subl %%ecx,%0\n" " addl %0,%%eax\n" "1:\n" + __COPYUSER_RESTORE_ES ".section .fixup,\"ax\"\n" "2: xorl %%eax,%%eax\n" " jmp 1b\n" @@ -227,7 +231,7 @@ EXPORT_SYMBOL(strnlen_user); #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size) +__generic_copy_to_user_intel(void __user *to, const void *from, unsigned long size) { int d0, d1; __asm__ __volatile__( @@ -239,36 +243,36 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) " .align 2,0x90\n" "3: movl 0(%4), %%eax\n" "4: movl 4(%4), %%edx\n" - "5: movl %%eax, 0(%3)\n" - "6: movl %%edx, 4(%3)\n" + "5: "__copyuser_seg" movl %%eax, 0(%3)\n" + "6: "__copyuser_seg" movl %%edx, 4(%3)\n" "7: movl 8(%4), %%eax\n" "8: movl 12(%4),%%edx\n" - "9: movl %%eax, 8(%3)\n" - "10: movl %%edx, 12(%3)\n" + "9: "__copyuser_seg" movl %%eax, 8(%3)\n" + "10: "__copyuser_seg" movl %%edx, 12(%3)\n" "11: movl 16(%4), %%eax\n" "12: movl 20(%4), %%edx\n" - "13: movl %%eax, 16(%3)\n" - "14: movl %%edx, 20(%3)\n" + "13: "__copyuser_seg" movl %%eax, 16(%3)\n" + "14: "__copyuser_seg" movl %%edx, 20(%3)\n" "15: movl 24(%4), %%eax\n" "16: movl 28(%4), %%edx\n" - "17: movl %%eax, 24(%3)\n" - "18: movl %%edx, 28(%3)\n" + "17: "__copyuser_seg" movl %%eax, 24(%3)\n" + "18: "__copyuser_seg" movl %%edx, 28(%3)\n" "19: movl 32(%4), %%eax\n" "20: movl 36(%4), %%edx\n" - "21: movl %%eax, 32(%3)\n" - "22: movl %%edx, 36(%3)\n" + "21: "__copyuser_seg" movl %%eax, 32(%3)\n" + "22: "__copyuser_seg" movl %%edx, 36(%3)\n" "23: movl 40(%4), %%eax\n" "24: movl 44(%4), %%edx\n" - "25: movl %%eax, 40(%3)\n" - "26: movl %%edx, 44(%3)\n" + "25: "__copyuser_seg" movl %%eax, 40(%3)\n" + "26: "__copyuser_seg" movl %%edx, 44(%3)\n" "27: movl 48(%4), %%eax\n" "28: movl 52(%4), %%edx\n" - "29: movl %%eax, 48(%3)\n" - "30: movl %%edx, 52(%3)\n" + "29: "__copyuser_seg" movl %%eax, 48(%3)\n" + "30: "__copyuser_seg" movl %%edx, 52(%3)\n" "31: movl 56(%4), %%eax\n" "32: movl 60(%4), %%edx\n" - "33: movl %%eax, 56(%3)\n" - "34: movl %%edx, 60(%3)\n" + "33: "__copyuser_seg" movl %%eax, 56(%3)\n" + "34: "__copyuser_seg" movl %%edx, 60(%3)\n" " addl $-64, %0\n" " addl $64, %4\n" " addl $64, %3\n" @@ -278,10 +282,119 @@ __copy_user_intel(void __user *to, const void *from, unsigned long size) " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" + __COPYUSER_SET_ES "99: rep; movsl\n" "36: movl %%eax, %0\n" "37: rep; movsb\n" "100:\n" + __COPYUSER_RESTORE_ES + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "1: "__copyuser_seg" movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: "__copyuser_seg" movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: "__copyuser_seg" movl 0(%4), %%eax\n" + "4: "__copyuser_seg" movl 4(%4), %%edx\n" + "5: movl %%eax, 0(%3)\n" + "6: movl %%edx, 4(%3)\n" + "7: "__copyuser_seg" movl 8(%4), %%eax\n" + "8: "__copyuser_seg" movl 12(%4),%%edx\n" + "9: movl %%eax, 8(%3)\n" + "10: movl %%edx, 12(%3)\n" + "11: "__copyuser_seg" movl 16(%4), %%eax\n" + "12: "__copyuser_seg" movl 20(%4), %%edx\n" + "13: movl %%eax, 16(%3)\n" + "14: movl %%edx, 20(%3)\n" + "15: "__copyuser_seg" movl 24(%4), %%eax\n" + "16: "__copyuser_seg" movl 28(%4), %%edx\n" + "17: movl %%eax, 24(%3)\n" + "18: movl %%edx, 28(%3)\n" + "19: "__copyuser_seg" movl 32(%4), %%eax\n" + "20: "__copyuser_seg" movl 36(%4), %%edx\n" + "21: movl %%eax, 32(%3)\n" + "22: movl %%edx, 36(%3)\n" + "23: "__copyuser_seg" movl 40(%4), %%eax\n" + "24: "__copyuser_seg" movl 44(%4), %%edx\n" + "25: movl %%eax, 40(%3)\n" + "26: movl %%edx, 44(%3)\n" + "27: "__copyuser_seg" movl 48(%4), %%eax\n" + "28: "__copyuser_seg" movl 52(%4), %%edx\n" + "29: movl %%eax, 48(%3)\n" + "30: movl %%edx, 52(%3)\n" + "31: "__copyuser_seg" movl 56(%4), %%eax\n" + "32: "__copyuser_seg" movl 60(%4), %%edx\n" + "33: movl %%eax, 56(%3)\n" + "34: movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; "__copyuser_seg" movsl\n" + "36: movl %%eax, %0\n" + "37: rep; "__copyuser_seg" movsb\n" + "100:\n" ".section .fixup,\"ax\"\n" "101: lea 0(%%eax,%0,4),%0\n" " jmp 100b\n" @@ -339,41 +452,41 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) int d0, d1; __asm__ __volatile__( " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" + "0: "__copyuser_seg" movl 32(%4), %%eax\n" " cmpl $67, %0\n" " jbe 2f\n" - "1: movl 64(%4), %%eax\n" + "1: "__copyuser_seg" movl 64(%4), %%eax\n" " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" + "2: "__copyuser_seg" movl 0(%4), %%eax\n" + "21: "__copyuser_seg" movl 4(%4), %%edx\n" " movl %%eax, 0(%3)\n" " movl %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" + "3: "__copyuser_seg" movl 8(%4), %%eax\n" + "31: "__copyuser_seg" movl 12(%4),%%edx\n" " movl %%eax, 8(%3)\n" " movl %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" + "4: "__copyuser_seg" movl 16(%4), %%eax\n" + "41: "__copyuser_seg" movl 20(%4), %%edx\n" " movl %%eax, 16(%3)\n" " movl %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" + "10: "__copyuser_seg" movl 24(%4), %%eax\n" + "51: "__copyuser_seg" movl 28(%4), %%edx\n" " movl %%eax, 24(%3)\n" " movl %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" + "11: "__copyuser_seg" movl 32(%4), %%eax\n" + "61: "__copyuser_seg" movl 36(%4), %%edx\n" " movl %%eax, 32(%3)\n" " movl %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" + "12: "__copyuser_seg" movl 40(%4), %%eax\n" + "71: "__copyuser_seg" movl 44(%4), %%edx\n" " movl %%eax, 40(%3)\n" " movl %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" + "13: "__copyuser_seg" movl 48(%4), %%eax\n" + "81: "__copyuser_seg" movl 52(%4), %%edx\n" " movl %%eax, 48(%3)\n" " movl %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" + "14: "__copyuser_seg" movl 56(%4), %%eax\n" + "91: "__copyuser_seg" movl 60(%4), %%edx\n" " movl %%eax, 56(%3)\n" " movl %%edx, 60(%3)\n" " addl $-64, %0\n" @@ -385,9 +498,9 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" - "6: rep; movsl\n" + "6: rep; "__copyuser_seg" movsl\n" " movl %%eax,%0\n" - "7: rep; movsb\n" + "7: rep; "__copyuser_seg" movsb\n" "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" @@ -440,41 +553,41 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to, __asm__ __volatile__( " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" + "0: "__copyuser_seg" movl 32(%4), %%eax\n" " cmpl $67, %0\n" " jbe 2f\n" - "1: movl 64(%4), %%eax\n" + "1: "__copyuser_seg" movl 64(%4), %%eax\n" " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" + "2: "__copyuser_seg" movl 0(%4), %%eax\n" + "21: "__copyuser_seg" movl 4(%4), %%edx\n" " movnti %%eax, 0(%3)\n" " movnti %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" + "3: "__copyuser_seg" movl 8(%4), %%eax\n" + "31: "__copyuser_seg" movl 12(%4),%%edx\n" " movnti %%eax, 8(%3)\n" " movnti %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" + "4: "__copyuser_seg" movl 16(%4), %%eax\n" + "41: "__copyuser_seg" movl 20(%4), %%edx\n" " movnti %%eax, 16(%3)\n" " movnti %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" + "10: "__copyuser_seg" movl 24(%4), %%eax\n" + "51: "__copyuser_seg" movl 28(%4), %%edx\n" " movnti %%eax, 24(%3)\n" " movnti %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" + "11: "__copyuser_seg" movl 32(%4), %%eax\n" + "61: "__copyuser_seg" movl 36(%4), %%edx\n" " movnti %%eax, 32(%3)\n" " movnti %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" + "12: "__copyuser_seg" movl 40(%4), %%eax\n" + "71: "__copyuser_seg" movl 44(%4), %%edx\n" " movnti %%eax, 40(%3)\n" " movnti %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" + "13: "__copyuser_seg" movl 48(%4), %%eax\n" + "81: "__copyuser_seg" movl 52(%4), %%edx\n" " movnti %%eax, 48(%3)\n" " movnti %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" + "14: "__copyuser_seg" movl 56(%4), %%eax\n" + "91: "__copyuser_seg" movl 60(%4), %%edx\n" " movnti %%eax, 56(%3)\n" " movnti %%edx, 60(%3)\n" " addl $-64, %0\n" @@ -487,9 +600,9 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to, " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" - "6: rep; movsl\n" + "6: rep; "__copyuser_seg" movsl\n" " movl %%eax,%0\n" - "7: rep; movsb\n" + "7: rep; "__copyuser_seg" movsb\n" "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" @@ -537,41 +650,41 @@ static unsigned long __copy_user_intel_nocache(void *to, __asm__ __volatile__( " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" + "0: "__copyuser_seg" movl 32(%4), %%eax\n" " cmpl $67, %0\n" " jbe 2f\n" - "1: movl 64(%4), %%eax\n" + "1: "__copyuser_seg" movl 64(%4), %%eax\n" " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" + "2: "__copyuser_seg" movl 0(%4), %%eax\n" + "21: "__copyuser_seg" movl 4(%4), %%edx\n" " movnti %%eax, 0(%3)\n" " movnti %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" + "3: "__copyuser_seg" movl 8(%4), %%eax\n" + "31: "__copyuser_seg" movl 12(%4),%%edx\n" " movnti %%eax, 8(%3)\n" " movnti %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" + "4: "__copyuser_seg" movl 16(%4), %%eax\n" + "41: "__copyuser_seg" movl 20(%4), %%edx\n" " movnti %%eax, 16(%3)\n" " movnti %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" + "10: "__copyuser_seg" movl 24(%4), %%eax\n" + "51: "__copyuser_seg" movl 28(%4), %%edx\n" " movnti %%eax, 24(%3)\n" " movnti %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" + "11: "__copyuser_seg" movl 32(%4), %%eax\n" + "61: "__copyuser_seg" movl 36(%4), %%edx\n" " movnti %%eax, 32(%3)\n" " movnti %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" + "12: "__copyuser_seg" movl 40(%4), %%eax\n" + "71: "__copyuser_seg" movl 44(%4), %%edx\n" " movnti %%eax, 40(%3)\n" " movnti %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" + "13: "__copyuser_seg" movl 48(%4), %%eax\n" + "81: "__copyuser_seg" movl 52(%4), %%edx\n" " movnti %%eax, 48(%3)\n" " movnti %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" + "14: "__copyuser_seg" movl 56(%4), %%eax\n" + "91: "__copyuser_seg" movl 60(%4), %%edx\n" " movnti %%eax, 56(%3)\n" " movnti %%edx, 60(%3)\n" " addl $-64, %0\n" @@ -584,9 +697,9 @@ static unsigned long __copy_user_intel_nocache(void *to, " shrl $2, %0\n" " andl $3, %%eax\n" " cld\n" - "6: rep; movsl\n" + "6: rep; "__copyuser_seg" movsl\n" " movl %%eax,%0\n" - "7: rep; movsb\n" + "7: rep; "__copyuser_seg" movsb\n" "8:\n" ".section .fixup,\"ax\"\n" "9: lea 0(%%eax,%0,4),%0\n" @@ -629,32 +742,36 @@ static unsigned long __copy_user_intel_nocache(void *to, */ unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); -unsigned long __copy_user_intel(void __user *to, const void *from, +unsigned long __generic_copy_to_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __generic_copy_from_user_intel(void *to, const void __user *from, unsigned long size); unsigned long __copy_user_zeroing_intel_nocache(void *to, const void __user *from, unsigned long size); #endif /* CONFIG_X86_INTEL_USERCOPY */ /* Generic arbitrary sized copy. */ -#define __copy_user(to, from, size) \ +#define __copy_user(to, from, size, prefix, set, restore) \ do { \ int __d0, __d1, __d2; \ __asm__ __volatile__( \ + set \ " cmp $7,%0\n" \ " jbe 1f\n" \ " movl %1,%0\n" \ " negl %0\n" \ " andl $7,%0\n" \ " subl %0,%3\n" \ - "4: rep; movsb\n" \ + "4: rep; "prefix"movsb\n" \ " movl %3,%0\n" \ " shrl $2,%0\n" \ " andl $3,%3\n" \ " .align 2,0x90\n" \ - "0: rep; movsl\n" \ + "0: rep; "prefix"movsl\n" \ " movl %3,%0\n" \ - "1: rep; movsb\n" \ + "1: rep; "prefix"movsb\n" \ "2:\n" \ + restore \ ".section .fixup,\"ax\"\n" \ "5: addl %3,%0\n" \ " jmp 2b\n" \ @@ -682,14 +799,14 @@ do { \ " negl %0\n" \ " andl $7,%0\n" \ " subl %0,%3\n" \ - "4: rep; movsb\n" \ + "4: rep; "__copyuser_seg"movsb\n" \ " movl %3,%0\n" \ " shrl $2,%0\n" \ " andl $3,%3\n" \ " .align 2,0x90\n" \ - "0: rep; movsl\n" \ + "0: rep; "__copyuser_seg"movsl\n" \ " movl %3,%0\n" \ - "1: rep; movsb\n" \ + "1: rep; "__copyuser_seg"movsb\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "5: addl %3,%0\n" \ @@ -775,9 +892,9 @@ survive: } #endif if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + __copy_user(to, from, n, "", __COPYUSER_SET_ES, __COPYUSER_RESTORE_ES); else - n = __copy_user_intel(to, from, n); + n = __generic_copy_to_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -797,10 +914,9 @@ unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); + __copy_user(to, from, n, __copyuser_seg, "", ""); else - n = __copy_user_intel((void __user *)to, - (const void *)from, n); + n = __generic_copy_from_user_intel(to, from, n); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -827,65 +943,49 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); else - __copy_user(to, from, n); + __copy_user(to, from, n, __copyuser_seg, "", ""); #else - __copy_user(to, from, n); + __copy_user(to, from, n, __copyuser_seg, "", ""); #endif return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); -/** - * copy_to_user: - Copy a block of data into user space. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from kernel space to user space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -unsigned long -copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); - return n; -} -EXPORT_SYMBOL(copy_to_user); - -/** - * copy_from_user: - Copy a block of data from user space. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from user space to kernel space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -unsigned long -_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; -} -EXPORT_SYMBOL(_copy_from_user); - void copy_from_user_overflow(void) { WARN(1, "Buffer overflow detected!\n"); } EXPORT_SYMBOL(copy_from_user_overflow); + +void copy_to_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_to_user_overflow); + +#ifdef CONFIG_PAX_MEMORY_UDEREF +void __set_fs(mm_segment_t x) +{ + switch (x.seg) { + case 0: + loadsegment(gs, 0); + break; + case TASK_SIZE_MAX: + loadsegment(gs, __USER_DS); + break; + case -1UL: + loadsegment(gs, __KERNEL_DS); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL(__set_fs); + +void set_fs(mm_segment_t x) +{ + current_thread_info()->addr_limit = x; + __set_fs(x); +} +EXPORT_SYMBOL(set_fs); +#endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 554b7b5..4027e2c 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -42,6 +42,12 @@ long __strncpy_from_user(char *dst, const char __user *src, long count) { long res; + +#ifdef CONFIG_PAX_MEMORY_UDEREF + if ((unsigned long)src < pax_user_shadow_base) + src += pax_user_shadow_base; +#endif + __do_strncpy_from_user(dst, src, count, res); return res; } @@ -87,7 +93,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) _ASM_EXTABLE(0b,3b) _ASM_EXTABLE(1b,2b) : [size8] "=&c"(size), [dst] "=&D" (__d0) - : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), + : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(____m(addr)), [zero] "r" (0UL), [eight] "r" (8UL)); return size; } @@ -149,12 +155,11 @@ long strlen_user(const char __user *s) } EXPORT_SYMBOL(strlen_user); -unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) +unsigned long copy_in_user(void __user *to, const void __user *from, unsigned long len) { - if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { - return copy_user_generic((__force void *)to, (__force void *)from, len); - } - return len; + if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) + return copy_user_generic((void __force_kernel *)____m(to), (void __force_kernel *)____m(from), len); + return len; } EXPORT_SYMBOL(copy_in_user); @@ -164,7 +169,7 @@ EXPORT_SYMBOL(copy_in_user); * it is not necessary to optimize tail handling. */ unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) +copy_user_handle_tail(char __user *to, char __user *from, unsigned long len, unsigned zerorest) { char c; unsigned zero_len; @@ -181,3 +186,15 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) break; return len; } + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); + +void copy_to_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_to_user_overflow); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index d0474ad..36e9257 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -8,7 +8,7 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) { + if (unlikely(!v8086_mode(regs) && SEGMENT_IS_PNP_CODE(regs->cs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; pnp_bios_is_utter_crap = 1; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 351590e..a1132fb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,11 +13,18 @@ #include /* perf_sw_event */ #include /* hstate_index_to_shift */ #include /* prefetchw */ +#include +#include #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ #include /* kmemcheck_*(), ... */ #include /* VSYSCALL_START */ +#include + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +#include +#endif /* * Page fault error code bits: @@ -55,7 +62,7 @@ static inline int __kprobes notify_page_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -116,7 +123,10 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (user_mode(regs)) { + if (__copy_from_user_inatomic(&opcode, (unsigned char __force_user *)(instr), 1)) + return 0; + } else if (probe_kernel_address(instr, opcode)) return 0; *prefetch = (instr_lo == 0xF) && @@ -150,7 +160,10 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (user_mode(regs)) { + if (__copy_from_user_inatomic(&opcode, (unsigned char __force_user *)(instr), 1)) + break; + } else if (probe_kernel_address(instr, opcode)) break; instr++; @@ -181,6 +194,34 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, force_sig_info(si_signo, &info, tsk); } +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static bool pax_is_fetch_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); +#endif + +#ifdef CONFIG_PAX_EMUTRAMP +static int pax_handle_fetch_fault(struct pt_regs *regs); +#endif + +#ifdef CONFIG_PAX_PAGEEXEC +static inline pmd_t * pax_get_pmd(struct mm_struct *mm, unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return NULL; + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return NULL; + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return NULL; + return pmd; +} +#endif + DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -231,10 +272,22 @@ void vmalloc_sync_all(void) for (address = VMALLOC_START & PMD_MASK; address >= TASK_SIZE && address < FIXADDR_TOP; address += PMD_SIZE) { + +#ifdef CONFIG_PAX_PER_CPU_PGD + unsigned long cpu; +#else struct page *page; +#endif spin_lock(&pgd_lock); + +#ifdef CONFIG_PAX_PER_CPU_PGD + for (cpu = 0; cpu < nr_cpu_ids; ++cpu) { + pgd_t *pgd = get_cpu_pgd(cpu); + pmd_t *ret; +#else list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; spinlock_t *pgt_lock; pmd_t *ret; @@ -242,8 +295,14 @@ void vmalloc_sync_all(void) pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); - ret = vmalloc_sync_one(page_address(page), address); + pgd = page_address(page); +#endif + + ret = vmalloc_sync_one(pgd, address); + +#ifndef CONFIG_PAX_PER_CPU_PGD spin_unlock(pgt_lock); +#endif if (!ret) break; @@ -277,6 +336,11 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) * an interrupt in the middle of a task switch.. */ pgd_paddr = read_cr3(); + +#ifdef CONFIG_PAX_PER_CPU_PGD + BUG_ON(__pa(get_cpu_pgd(smp_processor_id())) != (pgd_paddr & PHYSICAL_PAGE_MASK)); +#endif + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; @@ -372,7 +436,14 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) * happen within a race in page table update. In the later * case just flush: */ + +#ifdef CONFIG_PAX_PER_CPU_PGD + BUG_ON(__pa(get_cpu_pgd(smp_processor_id())) != (read_cr3() & PHYSICAL_PAGE_MASK)); + pgd = pgd_offset_cpu(smp_processor_id(), address); +#else pgd = pgd_offset(current->active_mm, address); +#endif + pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; @@ -542,7 +613,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) static int is_errata100(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) + if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) && (address >> 32)) return 1; #endif return 0; @@ -569,7 +640,7 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) } static const char nx_warning[] = KERN_CRIT -"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; +"kernel tried to execute NX-protected page - exploit attempt? (uid: %d, task: %s, pid: %d)\n"; static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, @@ -578,15 +649,26 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, if (!oops_may_print()) return; - if (error_code & PF_INSTR) { + if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR)) { unsigned int level; pte_t *pte = lookup_address(address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(nx_warning, current_uid()); + printk(nx_warning, current_uid(), current->comm, task_pid_nr(current)); } +#ifdef CONFIG_PAX_KERNEXEC + if (init_mm.start_code <= address && address < init_mm.end_code) { + if (current->signal->curr_ip) + printk(KERN_ERR "PAX: From %pI4: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", + ¤t->signal->curr_ip, current->comm, task_pid_nr(current), current_uid(), current_euid()); + else + printk(KERN_ERR "PAX: %s:%d, uid/euid: %u/%u, attempted to modify kernel code\n", + current->comm, task_pid_nr(current), current_uid(), current_euid()); + } +#endif + printk(KERN_ALERT "BUG: unable to handle kernel "); if (address < PAGE_SIZE) printk(KERN_CONT "NULL pointer dereference"); @@ -740,6 +822,22 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, return; } #endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + if (pax_is_fetch_fault(regs, error_code, address)) { + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return; + } +#endif + + pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); + do_group_exit(SIGKILL); + } +#endif + /* Kernel addresses are always protection faults: */ if (address >= TASK_SIZE) error_code |= PF_PROT; @@ -839,7 +937,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { printk(KERN_ERR "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", - tsk->comm, tsk->pid, address); + tsk->comm, task_pid_nr(tsk), address); code = BUS_MCEERR_AR; } #endif @@ -896,6 +994,99 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) return 1; } +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) +static int pax_handle_pageexec_fault(struct pt_regs *regs, struct mm_struct *mm, unsigned long address, unsigned long error_code) +{ + pte_t *pte; + pmd_t *pmd; + spinlock_t *ptl; + unsigned char pte_mask; + + if ((__supported_pte_mask & _PAGE_NX) || (error_code & (PF_PROT|PF_USER)) != (PF_PROT|PF_USER) || v8086_mode(regs) || + !(mm->pax_flags & MF_PAX_PAGEEXEC)) + return 0; + + /* PaX: it's our fault, let's handle it if we can */ + + /* PaX: take a look at read faults before acquiring any locks */ + if (unlikely(!(error_code & PF_WRITE) && (regs->ip == address))) { + /* instruction fetch attempt from a protected page in user mode */ + up_read(&mm->mmap_sem); + +#ifdef CONFIG_PAX_EMUTRAMP + switch (pax_handle_fetch_fault(regs)) { + case 2: + return 1; + } +#endif + + pax_report_fault(regs, (void *)regs->ip, (void *)regs->sp); + do_group_exit(SIGKILL); + } + + pmd = pax_get_pmd(mm, address); + if (unlikely(!pmd)) + return 0; + + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + if (unlikely(!(pte_val(*pte) & _PAGE_PRESENT) || pte_user(*pte))) { + pte_unmap_unlock(pte, ptl); + return 0; + } + + if (unlikely((error_code & PF_WRITE) && !pte_write(*pte))) { + /* write attempt to a protected page in user mode */ + pte_unmap_unlock(pte, ptl); + return 0; + } + +#ifdef CONFIG_SMP + if (likely(address > get_limit(regs->cs) && cpu_isset(smp_processor_id(), mm->context.cpu_user_cs_mask))) +#else + if (likely(address > get_limit(regs->cs))) +#endif + { + set_pte(pte, pte_mkread(*pte)); + __flush_tlb_one(address); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return 1; + } + + pte_mask = _PAGE_ACCESSED | _PAGE_USER | ((error_code & PF_WRITE) << (_PAGE_BIT_DIRTY-1)); + + /* + * PaX: fill DTLB with user rights and retry + */ + __asm__ __volatile__ ( + "orb %2,(%1)\n" +#if defined(CONFIG_M586) || defined(CONFIG_M586TSC) +/* + * PaX: let this uncommented 'invlpg' remind us on the behaviour of Intel's + * (and AMD's) TLBs. namely, they do not cache PTEs that would raise *any* + * page fault when examined during a TLB load attempt. this is true not only + * for PTEs holding a non-present entry but also present entries that will + * raise a page fault (such as those set up by PaX, or the copy-on-write + * mechanism). in effect it means that we do *not* need to flush the TLBs + * for our target pages since their PTEs are simply not in the TLBs at all. + + * the best thing in omitting it is that we gain around 15-20% speed in the + * fast path of the page fault handler and can get rid of tracing since we + * can no longer flush unintended entries. + */ + "invlpg (%0)\n" +#endif + __copyuser_seg"testb $0,(%0)\n" + "xorb %3,(%1)\n" + : + : "r" (address), "r" (pte), "q" (pte_mask), "i" (_PAGE_USER) + : "memory", "cc"); + pte_unmap_unlock(pte, ptl); + up_read(&mm->mmap_sem); + return 1; +} +#endif + /* * Handle a spurious fault caused by a stale TLB entry. * @@ -968,6 +1159,9 @@ int show_unhandled_signals = 1; static inline int access_error(unsigned long error_code, struct vm_area_struct *vma) { + if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR) && !(vma->vm_flags & VM_EXEC)) + return 1; + if (error_code & PF_WRITE) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) @@ -1001,18 +1195,32 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; - unsigned long address; struct mm_struct *mm; int fault; int write = error_code & PF_WRITE; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | (write ? FAULT_FLAG_WRITE : 0); - tsk = current; - mm = tsk->mm; - /* Get the faulting address: */ - address = read_cr2(); + unsigned long address = read_cr2(); + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + if (!user_mode(regs) && address < 2 * pax_user_shadow_base) { + if (!search_exception_tables(regs->ip)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + if (address < pax_user_shadow_base) { + printk(KERN_ERR "PAX: please report this to pageexec@freemail.hu\n"); + printk(KERN_ERR "PAX: faulting IP: %pS\n", (void *)regs->ip); + show_trace_log_lvl(NULL, NULL, (void *)regs->sp, regs->bp, KERN_ERR); + } else + address -= pax_user_shadow_base; + } +#endif + + tsk = current; + mm = tsk->mm; /* * Detect and handle instructions that would cause a page fault for @@ -1073,7 +1281,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */ - if (user_mode_vm(regs)) { + if (user_mode(regs)) { local_irq_enable(); error_code |= PF_USER; } else { @@ -1128,6 +1336,11 @@ retry: might_sleep(); } +#if defined(CONFIG_X86_32) && defined(CONFIG_PAX_PAGEEXEC) + if (pax_handle_pageexec_fault(regs, mm, address, error_code)) + return; +#endif + vma = find_vma(mm, address); if (unlikely(!vma)) { bad_area(regs, error_code, address); @@ -1139,18 +1352,24 @@ retry: bad_area(regs, error_code, address); return; } - if (error_code & PF_USER) { - /* - * Accessing the stack below %sp is always a bug. - * The large cushion allows instructions like enter - * and pusha to work. ("enter $65535, $31" pushes - * 32 pointers and then decrements %sp by 65535.) - */ - if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { - bad_area(regs, error_code, address); - return; - } + /* + * Accessing the stack below %sp is always a bug. + * The large cushion allows instructions like enter + * and pusha to work. ("enter $65535, $31" pushes + * 32 pointers and then decrements %sp by 65535.) + */ + if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < task_pt_regs(tsk)->sp)) { + bad_area(regs, error_code, address); + return; } + +#ifdef CONFIG_PAX_SEGMEXEC + if (unlikely((mm->pax_flags & MF_PAX_SEGMEXEC) && vma->vm_end - SEGMEXEC_TASK_SIZE - 1 < address - SEGMEXEC_TASK_SIZE - 1)) { + bad_area(regs, error_code, address); + return; + } +#endif + if (unlikely(expand_stack(vma, address))) { bad_area(regs, error_code, address); return; @@ -1205,3 +1424,292 @@ good_area: up_read(&mm->mmap_sem); } + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +static bool pax_is_fetch_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +{ + struct mm_struct *mm = current->mm; + unsigned long ip = regs->ip; + + if (v8086_mode(regs)) + ip = ((regs->cs & 0xffff) << 4) + (ip & 0xffff); + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if ((__supported_pte_mask & _PAGE_NX) && (error_code & PF_INSTR)) + return true; + if (!(error_code & (PF_PROT | PF_WRITE)) && ip == address) + return true; + return false; + } +#endif + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) { + if (!(error_code & (PF_PROT | PF_WRITE)) && (ip + SEGMEXEC_TASK_SIZE == address)) + return true; + return false; + } +#endif + + return false; +} +#endif + +#ifdef CONFIG_PAX_EMUTRAMP +static int pax_handle_fetch_fault_32(struct pt_regs *regs) +{ + int err; + + do { /* PaX: libffi trampoline emulation */ + unsigned char mov, jmp; + unsigned int addr1, addr2; + +#ifdef CONFIG_X86_64 + if ((regs->ip + 9) >> 32) + break; +#endif + + err = get_user(mov, (unsigned char __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5)); + err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); + + if (err) + break; + + if (mov == 0xB8 && jmp == 0xE9) { + regs->ax = addr1; + regs->ip = (unsigned int)(regs->ip + addr2 + 10); + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned char mov1, mov2; + unsigned short jmp; + unsigned int addr1, addr2; + +#ifdef CONFIG_X86_64 + if ((regs->ip + 11) >> 32) + break; +#endif + + err = get_user(mov1, (unsigned char __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); + err |= get_user(mov2, (unsigned char __user *)(regs->ip + 5)); + err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); + err |= get_user(jmp, (unsigned short __user *)(regs->ip + 10)); + + if (err) + break; + + if (mov1 == 0xB9 && mov2 == 0xB8 && jmp == 0xE0FF) { + regs->cx = addr1; + regs->ax = addr2; + regs->ip = addr2; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned char mov, jmp; + unsigned int addr1, addr2; + +#ifdef CONFIG_X86_64 + if ((regs->ip + 9) >> 32) + break; +#endif + + err = get_user(mov, (unsigned char __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 1)); + err |= get_user(jmp, (unsigned char __user *)(regs->ip + 5)); + err |= get_user(addr2, (unsigned int __user *)(regs->ip + 6)); + + if (err) + break; + + if (mov == 0xB9 && jmp == 0xE9) { + regs->cx = addr1; + regs->ip = (unsigned int)(regs->ip + addr2 + 10); + return 2; + } + } while (0); + + return 1; /* PaX in action */ +} + +#ifdef CONFIG_X86_64 +static int pax_handle_fetch_fault_64(struct pt_regs *regs) +{ + int err; + + do { /* PaX: libffi trampoline emulation */ + unsigned short mov1, mov2, jmp1; + unsigned char stcclc, jmp2; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned short __user *)regs->ip); + err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2)); + err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10)); + err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12)); + err |= get_user(stcclc, (unsigned char __user *)(regs->ip + 20)); + err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 21)); + err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 23)); + + if (err) + break; + + if (mov1 == 0xBB49 && mov2 == 0xBA49 && (stcclc == 0xF8 || stcclc == 0xF9) && jmp1 == 0xFF49 && jmp2 == 0xE3) { + regs->r11 = addr1; + regs->r10 = addr2; + if (stcclc == 0xF8) + regs->flags &= ~X86_EFLAGS_CF; + else + regs->flags |= X86_EFLAGS_CF; + regs->ip = addr1; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #1 */ + unsigned short mov1, mov2, jmp1; + unsigned char jmp2; + unsigned int addr1; + unsigned long addr2; + + err = get_user(mov1, (unsigned short __user *)regs->ip); + err |= get_user(addr1, (unsigned int __user *)(regs->ip + 2)); + err |= get_user(mov2, (unsigned short __user *)(regs->ip + 6)); + err |= get_user(addr2, (unsigned long __user *)(regs->ip + 8)); + err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 16)); + err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 18)); + + if (err) + break; + + if (mov1 == 0xBB41 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { + regs->r11 = addr1; + regs->r10 = addr2; + regs->ip = addr1; + return 2; + } + } while (0); + + do { /* PaX: gcc trampoline emulation #2 */ + unsigned short mov1, mov2, jmp1; + unsigned char jmp2; + unsigned long addr1, addr2; + + err = get_user(mov1, (unsigned short __user *)regs->ip); + err |= get_user(addr1, (unsigned long __user *)(regs->ip + 2)); + err |= get_user(mov2, (unsigned short __user *)(regs->ip + 10)); + err |= get_user(addr2, (unsigned long __user *)(regs->ip + 12)); + err |= get_user(jmp1, (unsigned short __user *)(regs->ip + 20)); + err |= get_user(jmp2, (unsigned char __user *)(regs->ip + 22)); + + if (err) + break; + + if (mov1 == 0xBB49 && mov2 == 0xBA49 && jmp1 == 0xFF49 && jmp2 == 0xE3) { + regs->r11 = addr1; + regs->r10 = addr2; + regs->ip = addr1; + return 2; + } + } while (0); + + return 1; /* PaX in action */ +} +#endif + +/* + * PaX: decide what to do with offenders (regs->ip = fault address) + * + * returns 1 when task should be killed + * 2 when gcc trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + if (v8086_mode(regs)) + return 1; + + if (!(current->mm->pax_flags & MF_PAX_EMUTRAMP)) + return 1; + +#ifdef CONFIG_X86_32 + return pax_handle_fetch_fault_32(regs); +#else + if (regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT)) + return pax_handle_fetch_fault_32(regs); + else + return pax_handle_fetch_fault_64(regs); +#endif +} +#endif + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) +void pax_report_insns(struct pt_regs *regs, void *pc, void *sp) +{ + long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char __force_user *)pc+i)) + printk(KERN_CONT "?? "); + else + printk(KERN_CONT "%02x ", c); + } + printk("\n"); + + printk(KERN_ERR "PAX: bytes at SP-%lu: ", (unsigned long)sizeof(long)); + for (i = -1; i < 80 / (long)sizeof(long); i++) { + unsigned long c; + if (get_user(c, (unsigned long __force_user *)sp+i)) { +#ifdef CONFIG_X86_32 + printk(KERN_CONT "???????? "); +#else + if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT))) + printk(KERN_CONT "???????? ???????? "); + else + printk(KERN_CONT "???????????????? "); +#endif + } else { +#ifdef CONFIG_X86_64 + if ((regs->cs == __USER32_CS || (regs->cs & SEGMENT_LDT))) { + printk(KERN_CONT "%08x ", (unsigned int)c); + printk(KERN_CONT "%08x ", (unsigned int)(c >> 32)); + } else +#endif + printk(KERN_CONT "%0*lx ", 2 * (int)sizeof(long), c); + } + } + printk("\n"); +} +#endif + +/** + * probe_kernel_write(): safely attempt to write to a location + * @dst: address to write to + * @src: pointer to the data that shall be written + * @size: size of the data chunk + * + * Safely write to address @dst from the buffer at @src. If a kernel fault + * happens, handle that and return -EFAULT. + */ +long notrace probe_kernel_write(void *dst, const void *src, size_t size) +{ + long ret; + mm_segment_t old_fs = get_fs(); + + set_fs(KERNEL_DS); + pagefault_disable(); + pax_open_kernel(); + ret = __copy_to_user_inatomic((void __force_user *)dst, src, size); + pax_close_kernel(); + pagefault_enable(); + set_fs(old_fs); + + return ret ? -EFAULT : 0; +} diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index dd74e46..3f2d038 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -255,7 +255,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, addr = start; len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; - if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + if (unlikely(!access_ok_noprefault(write ? VERIFY_WRITE : VERIFY_READ, (void __user *)start, len))) return 0; @@ -331,6 +331,10 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, goto slow_irqon; #endif + if (unlikely(!access_ok_noprefault(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + /* * XXX: batch / limit 'nr', to avoid large irq off latency * needs some instrumenting to determine the common sizes used by diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index f4f29b1..5cac4fb 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -44,7 +44,11 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); + + pax_open_kernel(); set_pte(kmap_pte-idx, mk_pte(page, prot)); + pax_close_kernel(); + arch_flush_lazy_mmu_mode(); return (void *)vaddr; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index df7d12c..93fae8e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -277,13 +277,21 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; + unsigned long start_addr, pax_task_size = TASK_SIZE; + unsigned long offset = gr_rand_threadstack_offset(mm, file, flags); + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; + start_addr = mm->free_area_cache; } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + start_addr = mm->mmap_base; + mm->cached_hole_size = 0; } full_search: @@ -291,26 +299,27 @@ full_search: for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { + if (pax_task_size - len < addr) { /* * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } return -ENOMEM; } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } + if (check_heap_stack_gap(vma, &addr, len, offset)) + break; if (addr + mm->cached_hole_size < vma->vm_start) mm->cached_hole_size = vma->vm_start - addr; addr = ALIGN(vma->vm_end, huge_page_size(h)); } + + mm->free_area_cache = addr + len; + return addr; } static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, @@ -319,10 +328,10 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, { struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; + struct vm_area_struct *vma; + unsigned long base = mm->mmap_base, addr; unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; + unsigned long offset = gr_rand_threadstack_offset(mm, file, flags); /* don't allow allocations above current base */ if (mm->free_area_cache > base) @@ -332,64 +341,68 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, largest_hole = 0; mm->free_area_cache = base; } -try_again: + /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; /* either no address requested or can't fit in requested address hole */ - addr = (mm->free_area_cache - len) & huge_page_mask(h); + addr = (mm->free_area_cache - len); do { + addr &= huge_page_mask(h); /* * Lookup failure means no vma is above this address, * i.e. return with success: */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) + vma = find_vma(mm, addr); + if (!vma) return addr; /* * new region fits between prev_vma->vm_end and * vma->vm_start, use it: */ - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { + if (check_heap_stack_gap(vma, &addr, len, offset)) { /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) { + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; } /* remember the largest hole we saw so far */ if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; + largest_hole = vma->vm_start - addr; /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & huge_page_mask(h); - } while (len <= vma->vm_start); + addr = skip_heap_stack_gap(vma, len, offset); + } while (!IS_ERR_VALUE(addr)); fail: /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + mm->mmap_base = SEGMEXEC_TASK_UNMAPPED_BASE; + else +#endif + + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; mm->cached_hole_size = ~0UL; addr = hugetlb_get_unmapped_area_bottomup(file, addr0, len, pgoff, flags); @@ -397,6 +410,7 @@ fail: /* * Restore the topdown base: */ + mm->mmap_base = base; mm->free_area_cache = base; mm->cached_hole_size = ~0UL; @@ -410,10 +424,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; + unsigned long pax_task_size = TASK_SIZE; + unsigned long offset = gr_rand_threadstack_offset(mm, file, flags); if (len & ~huge_page_mask(h)) return -EINVAL; - if (len > TASK_SIZE) + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif + + pax_task_size -= PAGE_SIZE; + + if (len > pax_task_size) return -ENOMEM; if (flags & MAP_FIXED) { @@ -422,11 +446,14 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return addr; } +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + if (addr) { addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + if (pax_task_size - len >= addr && check_heap_stack_gap(vma, &addr, len, offset)) return addr; } if (mm->get_unmapped_area == arch_get_unmapped_area) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a4cca06..9e00106 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -15,6 +16,8 @@ #include #include #include +#include +#include unsigned long __initdata pgt_buf_start; unsigned long __meminitdata pgt_buf_end; @@ -43,7 +46,7 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range) { int i; unsigned long puds = 0, pmds = 0, ptes = 0, tables; - unsigned long start = 0, good_end; + unsigned long start = 0x100000, good_end; unsigned long pgd_extra = 0; phys_addr_t base; @@ -282,7 +285,14 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, #ifdef CONFIG_X86_32 early_ioremap_page_table_range_init(); +#endif +#ifdef CONFIG_PAX_PER_CPU_PGD + clone_pgd_range(get_cpu_pgd(0) + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + load_cr3(get_cpu_pgd(0)); +#elif defined(CONFIG_X86_32) load_cr3(swapper_pg_dir); #endif @@ -324,10 +334,40 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * Access has to be given to non-kernel-ram areas as well, these contain the PCI * mmio resources as well as potential bios/acpi data regions. */ + +#ifdef CONFIG_GRKERNSEC_KMEM +static unsigned int ebda_start __read_only; +static unsigned int ebda_end __read_only; +#endif + int devmem_is_allowed(unsigned long pagenr) { +#ifdef CONFIG_GRKERNSEC_KMEM + /* allow BDA */ + if (!pagenr) + return 1; + /* allow EBDA */ + if (pagenr >= ebda_start && pagenr < ebda_end) + return 1; + /* if tboot is in use, allow access to its hardcoded serial log range */ + if (tboot_enabled() && ((0x60000 >> PAGE_SHIFT) <= pagenr) && (pagenr < (0x68000 >> PAGE_SHIFT))) + return 1; +#else + if (!pagenr) + return 1; +#ifdef CONFIG_VM86 + if (pagenr < (ISA_START_ADDRESS >> PAGE_SHIFT)) + return 1; +#endif +#endif + + if ((ISA_START_ADDRESS >> PAGE_SHIFT) <= pagenr && pagenr < (ISA_END_ADDRESS >> PAGE_SHIFT)) + return 1; +#ifdef CONFIG_GRKERNSEC_KMEM + /* throw out everything else below 1MB */ if (pagenr <= 256) - return 1; + return 0; +#endif if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) return 0; if (!page_is_ram(pagenr)) @@ -384,8 +424,117 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) #endif } +#ifdef CONFIG_GRKERNSEC_KMEM +static inline void gr_init_ebda(void) +{ + unsigned int ebda_addr; + unsigned int ebda_size = 0; + + ebda_addr = get_bios_ebda(); + if (ebda_addr) { + ebda_size = *(unsigned char *)phys_to_virt(ebda_addr); + ebda_size <<= 10; + } + if (ebda_addr && ebda_size) { + ebda_start = ebda_addr >> PAGE_SHIFT; + ebda_end = min((unsigned int)PAGE_ALIGN(ebda_addr + ebda_size), (unsigned int)0xa0000) >> PAGE_SHIFT; + } else { + ebda_start = 0x9f000 >> PAGE_SHIFT; + ebda_end = 0xa0000 >> PAGE_SHIFT; + } +} +#else +static inline void gr_init_ebda(void) { } +#endif + void free_initmem(void) { +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_X86_32 + /* PaX: limit KERNEL_CS to actual size */ + unsigned long addr, limit; + struct desc_struct d; + int cpu; +#else + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr, end; +#endif +#endif + + gr_init_ebda(); + +#ifdef CONFIG_PAX_KERNEXEC +#ifdef CONFIG_X86_32 + limit = paravirt_enabled() ? ktva_ktla(0xffffffff) : (unsigned long)&_etext; + limit = (limit - 1UL) >> PAGE_SHIFT; + + memset(__LOAD_PHYSICAL_ADDR + PAGE_OFFSET, POISON_FREE_INITMEM, PAGE_SIZE); + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + pack_descriptor(&d, get_desc_base(&get_cpu_gdt_table(cpu)[GDT_ENTRY_KERNEL_CS]), limit, 0x9B, 0xC); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEL_CS, &d, DESCTYPE_S); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_KERNEXEC_KERNEL_CS, &d, DESCTYPE_S); + } + + /* PaX: make KERNEL_CS read-only */ + addr = PFN_ALIGN(ktla_ktva((unsigned long)&_text)); + if (!paravirt_enabled()) + set_memory_ro(addr, (PFN_ALIGN(_sdata) - addr) >> PAGE_SHIFT); +/* + for (addr = ktla_ktva((unsigned long)&_text); addr < (unsigned long)&_sdata; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } +*/ +#ifdef CONFIG_X86_PAE + set_memory_nx(PFN_ALIGN(__init_begin), (PFN_ALIGN(__init_end) - PFN_ALIGN(__init_begin)) >> PAGE_SHIFT); +/* + for (addr = (unsigned long)&__init_begin; addr < (unsigned long)&__init_end; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); + } +*/ +#endif + +#ifdef CONFIG_MODULES + set_memory_4k((unsigned long)MODULES_EXEC_VADDR, (MODULES_EXEC_END - MODULES_EXEC_VADDR) >> PAGE_SHIFT); +#endif + +#else + /* PaX: make kernel code/rodata read-only, rest non-executable */ + for (addr = __START_KERNEL_map; addr < __START_KERNEL_map + KERNEL_IMAGE_SIZE; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + continue; + if ((unsigned long)_text <= addr && addr < (unsigned long)_sdata) + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + else + set_pmd(pmd, __pmd(pmd_val(*pmd) | (_PAGE_NX & __supported_pte_mask))); + } + + addr = (unsigned long)__va(__pa(__START_KERNEL_map)); + end = addr + KERNEL_IMAGE_SIZE; + for (; addr < end; addr += PMD_SIZE) { + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + continue; + if ((unsigned long)__va(__pa(_text)) <= addr && addr < (unsigned long)__va(__pa(_sdata))) + set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_RW)); + } +#endif + + flush_tlb_all(); +#endif + free_init_pages("unused kernel memory", (unsigned long)(&__init_begin), (unsigned long)(&__init_end)); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 29f7c6d9..5122941 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -74,36 +74,6 @@ static __init void *alloc_low_page(void) } /* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_bootmem) - pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); - else - pmd_table = (pmd_t *)alloc_low_page(); - paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - BUG_ON(pmd_table != pmd_offset(pud, 0)); - - return pmd_table; - } -#endif - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); - - return pmd_table; -} - -/* * Create a page table and place a pointer to it in a middle page * directory entry: */ @@ -123,13 +93,28 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) page_table = (pte_t *)alloc_low_page(); paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE)); +#else set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); +#endif BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } return pte_offset_kernel(pmd, 0); } +static pmd_t * __init one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); + + return pmd_table; +} + pmd_t * __init populate_extra_pmd(unsigned long vaddr) { int pgd_idx = pgd_index(vaddr); @@ -203,6 +188,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) int pgd_idx, pmd_idx; unsigned long vaddr; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte = NULL; @@ -212,8 +198,13 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) pgd = pgd_base + pgd_idx; for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - pmd = pmd + pmd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + +#ifdef CONFIG_X86_PAE + paravirt_alloc_pmd(&init_mm, __pa(pmd) >> PAGE_SHIFT); +#endif + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { pte = page_table_kmap_check(one_page_table_init(pmd), @@ -225,11 +216,20 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -static inline int is_kernel_text(unsigned long addr) +static inline int is_kernel_text(unsigned long start, unsigned long end) { - if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) - return 1; - return 0; + if ((start >= ktla_ktva((unsigned long)_etext) || + end <= ktla_ktva((unsigned long)_stext)) && + (start >= ktla_ktva((unsigned long)_einittext) || + end <= ktla_ktva((unsigned long)_sinittext)) && + +#ifdef CONFIG_ACPI_SLEEP + (start >= (unsigned long)__va(acpi_wakeup_address) + 0x4000 || end <= (unsigned long)__va(acpi_wakeup_address)) && +#endif + + (start > (unsigned long)__va(0xfffff) || end <= (unsigned long)__va(0xc0000))) + return 0; + return 1; } /* @@ -246,9 +246,10 @@ kernel_physical_mapping_init(unsigned long start, unsigned long last_map_addr = end; unsigned long start_pfn, end_pfn; pgd_t *pgd_base = swapper_pg_dir; - int pgd_idx, pmd_idx, pte_ofs; + unsigned int pgd_idx, pmd_idx, pte_ofs; unsigned long pfn; pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned pages_2m, pages_4k; @@ -281,8 +282,13 @@ repeat: pfn = start_pfn; pgd_idx = pgd_index((pfn<> PAGE_SHIFT); +#endif if (pfn >= end_pfn) continue; @@ -294,14 +300,13 @@ repeat: #endif for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn; pmd++, pmd_idx++) { - unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET; + unsigned long address = pfn * PAGE_SIZE + PAGE_OFFSET; /* * Map with big pages if possible, otherwise * create normal page tables: */ if (use_pse) { - unsigned int addr2; pgprot_t prot = PAGE_KERNEL_LARGE; /* * first pass will use the same initial @@ -311,11 +316,7 @@ repeat: __pgprot(PTE_IDENT_ATTR | _PAGE_PSE); - addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + - PAGE_OFFSET + PAGE_SIZE-1; - - if (is_kernel_text(addr) || - is_kernel_text(addr2)) + if (is_kernel_text(address, address + PMD_SIZE)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -332,7 +333,7 @@ repeat: pte_ofs = pte_index((pfn<> 10, - (unsigned long)&_etext, (unsigned long)&_edata, - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + (unsigned long)&_sdata, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_sdata) >> 10, - (unsigned long)&_text, (unsigned long)&_etext, + ktla_ktva((unsigned long)&_text), ktla_ktva((unsigned long)&_etext), ((unsigned long)&_etext - (unsigned long)&_text) >> 10); /* @@ -896,6 +895,7 @@ void set_kernel_text_rw(void) if (!kernel_set_to_readonly) return; + start = ktla_ktva(start); pr_debug("Set kernel text: %lx - %lx for read write\n", start, start+size); @@ -910,6 +910,7 @@ void set_kernel_text_ro(void) if (!kernel_set_to_readonly) return; + start = ktla_ktva(start); pr_debug("Set kernel text: %lx - %lx for read only\n", start, start+size); @@ -938,6 +939,7 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = PFN_ALIGN(_etext) - start; + start = ktla_ktva(start); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel text: %luk\n", size >> 10); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 44b93da..5a0b3ee 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -75,7 +75,7 @@ early_param("gbpages", parse_direct_gbpages_on); * around without checking the pgd every time. */ -pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; +pteval_t __supported_pte_mask __read_only = ~(_PAGE_NX | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); int force_personality32; @@ -108,12 +108,22 @@ void sync_global_pgds(unsigned long start, unsigned long end) for (address = start; address <= end; address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); + +#ifdef CONFIG_PAX_PER_CPU_PGD + unsigned long cpu; +#else struct page *page; +#endif if (pgd_none(*pgd_ref)) continue; spin_lock(&pgd_lock); + +#ifdef CONFIG_PAX_PER_CPU_PGD + for (cpu = 0; cpu < nr_cpu_ids; ++cpu) { + pgd_t *pgd = pgd_offset_cpu(cpu, address); +#else list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; spinlock_t *pgt_lock; @@ -122,6 +132,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); +#endif if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); @@ -129,7 +140,10 @@ void sync_global_pgds(unsigned long start, unsigned long end) BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); +#ifndef CONFIG_PAX_PER_CPU_PGD spin_unlock(pgt_lock); +#endif + } spin_unlock(&pgd_lock); } @@ -162,7 +176,7 @@ static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr) { if (pgd_none(*pgd)) { pud_t *pud = (pud_t *)spp_getpage(); - pgd_populate(&init_mm, pgd, pud); + pgd_populate_kernel(&init_mm, pgd, pud); if (pud != pud_offset(pgd, 0)) printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", pud, pud_offset(pgd, 0)); @@ -174,7 +188,7 @@ static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) { if (pud_none(*pud)) { pmd_t *pmd = (pmd_t *) spp_getpage(); - pud_populate(&init_mm, pud, pmd); + pud_populate_kernel(&init_mm, pud, pmd); if (pmd != pmd_offset(pud, 0)) printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud, 0)); @@ -203,7 +217,9 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) pmd = fill_pmd(pud, vaddr); pte = fill_pte(pmd, vaddr); + pax_open_kernel(); set_pte(pte, new_pte); + pax_close_kernel(); /* * It's enough to flush this one mapping. @@ -262,14 +278,12 @@ static void __init __init_extra_mapping(unsigned long phys, unsigned long size, pgd = pgd_offset_k((unsigned long)__va(phys)); if (pgd_none(*pgd)) { pud = (pud_t *) spp_getpage(); - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | - _PAGE_USER)); + set_pgd(pgd, __pgd(__pa(pud) | _PAGE_TABLE)); } pud = pud_offset(pgd, (unsigned long)__va(phys)); if (pud_none(*pud)) { pmd = (pmd_t *) spp_getpage(); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | - _PAGE_USER)); + set_pud(pud, __pud(__pa(pmd) | _PAGE_TABLE)); } pmd = pmd_offset(pud, phys); BUG_ON(!pmd_none(*pmd)); @@ -330,7 +344,7 @@ static __ref void *alloc_low_page(unsigned long *phys) if (pfn >= pgt_buf_top) panic("alloc_low_page: ran out of memory"); - adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); + adr = (void __force_kernel *)early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); clear_page(adr); *phys = pfn * PAGE_SIZE; return adr; @@ -346,7 +360,7 @@ static __ref void *map_low_page(void *virt) phys = __pa(virt); left = phys & (PAGE_SIZE - 1); - adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); + adr = (void __force_kernel *)early_memremap(phys & PAGE_MASK, PAGE_SIZE); adr = (void *)(((unsigned long)adr) | left); return adr; @@ -546,7 +560,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, unmap_low_page(pmd); spin_lock(&init_mm.page_table_lock); - pud_populate(&init_mm, pud, __va(pmd_phys)); + pud_populate_kernel(&init_mm, pud, __va(pmd_phys)); spin_unlock(&init_mm.page_table_lock); } __flush_tlb_all(); @@ -592,7 +606,7 @@ kernel_physical_mapping_init(unsigned long start, unmap_low_page(pud); spin_lock(&init_mm.page_table_lock); - pgd_populate(&init_mm, pgd, __va(pud_phys)); + pgd_populate_kernel(&init_mm, pgd, __va(pud_phys)); spin_unlock(&init_mm.page_table_lock); pgd_changed = true; } @@ -856,8 +870,8 @@ int kern_addr_valid(unsigned long addr) static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), - .vm_page_prot = PAGE_READONLY_EXEC, - .vm_flags = VM_READ | VM_EXEC + .vm_page_prot = PAGE_READONLY, + .vm_flags = VM_READ }; struct vm_area_struct *get_gate_vma(struct mm_struct *mm) @@ -891,7 +905,7 @@ int in_gate_area_no_mm(unsigned long addr) const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) return "[vdso]"; if (vma == &gate_vma) return "[vsyscall]"; diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 7b179b49..6bd17777 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -64,7 +64,11 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + + pax_open_kernel(); set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + pax_close_kernel(); + arch_flush_lazy_mmu_mode(); return (void *)vaddr; diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index dec49d3..1943563 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -56,8 +56,8 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, unsigned long i; for (i = 0; i < nr_pages; ++i) - if (pfn_valid(start_pfn + i) && - !PageReserved(pfn_to_page(start_pfn + i))) + if (pfn_valid(start_pfn + i) && (start_pfn + i >= 0x100 || + !PageReserved(pfn_to_page(start_pfn + i)))) return 1; WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn); @@ -268,7 +268,7 @@ EXPORT_SYMBOL(ioremap_prot); * * Caller must ensure there is only one unmapping for the same pointer. */ -void iounmap(volatile void __iomem *addr) +void iounmap(const volatile void __iomem *addr) { struct vm_struct *p, *o; @@ -322,23 +322,22 @@ EXPORT_SYMBOL(iounmap); */ void *xlate_dev_mem_ptr(unsigned long phys) { - void *addr; - unsigned long start = phys & PAGE_MASK; - /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ - if (page_is_ram(start >> PAGE_SHIFT)) + if (page_is_ram(phys >> PAGE_SHIFT)) +#ifdef CONFIG_HIGHMEM + if ((phys >> PAGE_SHIFT) < max_low_pfn) +#endif return __va(phys); - addr = (void __force *)ioremap_cache(start, PAGE_SIZE); - if (addr) - addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); - - return addr; + return (void __force *)ioremap_cache(phys, PAGE_SIZE); } void unxlate_dev_mem_ptr(unsigned long phys, void *addr) { if (page_is_ram(phys >> PAGE_SHIFT)) +#ifdef CONFIG_HIGHMEM + if ((phys >> PAGE_SHIFT) < max_low_pfn) +#endif return; iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); @@ -356,7 +355,7 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; +static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __read_only __aligned(PAGE_SIZE); static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { @@ -393,8 +392,7 @@ void __init early_ioremap_init(void) slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); - memset(bm_pte, 0, sizeof(bm_pte)); - pmd_populate_kernel(&init_mm, pmd, bm_pte); + pmd_populate_user(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index d87dd6d..bf3fa66 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -622,9 +622,9 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, * memory (e.g. tracked pages)? For now, we need this to avoid * invoking kmemcheck for PnP BIOS calls. */ - if (regs->flags & X86_VM_MASK) + if (v8086_mode(regs)) return false; - if (regs->cs != __KERNEL_CS) + if (regs->cs != __KERNEL_CS && regs->cs != __KERNEXEC_KERNEL_CS) return false; pte = kmemcheck_pte_lookup(address); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 75f9e5d..77b1e62 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -52,7 +52,7 @@ static unsigned long stack_maxrandom_size(void) * Leave an at least ~128 MB hole with possible stack randomization. */ #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) -#define MAX_GAP (TASK_SIZE/6*5) +#define MAX_GAP (pax_task_size/6*5) static int mmap_is_legacy(void) { @@ -82,27 +82,40 @@ static unsigned long mmap_rnd(void) return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(void) +static unsigned long mmap_base(struct mm_struct *mm) { unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long pax_task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + pax_task_size = SEGMEXEC_TASK_SIZE; +#endif if (gap < MIN_GAP) gap = MIN_GAP; else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); + return PAGE_ALIGN(pax_task_size - gap - mmap_rnd()); } /* * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -static unsigned long mmap_legacy_base(void) +static unsigned long mmap_legacy_base(struct mm_struct *mm) { - if (mmap_is_ia32()) + if (mmap_is_ia32()) { + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + return SEGMEXEC_TASK_UNMAPPED_BASE; + else +#endif + return TASK_UNMAPPED_BASE; - else + } else return TASK_UNMAPPED_BASE + mmap_rnd(); } @@ -112,8 +125,15 @@ static unsigned long mmap_legacy_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { - mm->mmap_legacy_base = mmap_legacy_base(); - mm->mmap_base = mmap_base(); + mm->mmap_legacy_base = mmap_legacy_base(mm); + mm->mmap_base = mmap_base(mm); + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) { + mm->mmap_legacy_base += mm->delta_mmap; + mm->mmap_base -= mm->delta_mmap + mm->delta_stack; + } +#endif if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index de54b9b..935281f 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -194,7 +194,7 @@ static void pre(struct kmmio_probe *p, struct pt_regs *regs, break; default: { - unsigned char *ip = (unsigned char *)instptr; + unsigned char *ip = (unsigned char *)ktla_ktva(instptr); my_trace->opcode = MMIO_UNKNOWN_OP; my_trace->width = 0; my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | @@ -234,7 +234,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, static void ioremap_trace_core(resource_size_t offset, unsigned long size, void __iomem *addr) { - static atomic_t next_id; + static atomic_unchecked_t next_id; struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); /* These are page-unaligned. */ struct mmiotrace_map map = { @@ -258,7 +258,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, .private = trace }, .phys = offset, - .id = atomic_inc_return(&next_id) + .id = atomic_inc_return_unchecked(&next_id) }; map.map_id = trace->id; @@ -290,7 +290,7 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size, ioremap_trace_core(offset, size, addr); } -static void iounmap_trace_core(volatile void __iomem *addr) +static void iounmap_trace_core(const volatile void __iomem *addr) { struct mmiotrace_map map = { .phys = 0, @@ -328,7 +328,7 @@ not_enabled: } } -void mmiotrace_iounmap(volatile void __iomem *addr) +void mmiotrace_iounmap(const volatile void __iomem *addr) { might_sleep(); if (is_enabled()) /* recheck and proper locking in *_core() */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index fbeaaf4..559063f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -494,7 +494,7 @@ static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) return true; } -static int __init numa_register_memblks(struct numa_meminfo *mi) +static int __init __intentional_overflow(-1) numa_register_memblks(struct numa_meminfo *mi) { unsigned long uninitialized_var(pfn_align); int i, nid; diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index b008656..773eac2 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -36,7 +36,7 @@ enum { static int pte_testbit(pte_t pte) { - return pte_flags(pte) & _PAGE_UNUSED1; + return pte_flags(pte) & _PAGE_CPA_TEST; } struct split_state { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f9e5267..5c194c9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -261,7 +261,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, */ #ifdef CONFIG_PCI_BIOS if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) - pgprot_val(forbidden) |= _PAGE_NX; + pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask; #endif /* @@ -269,9 +269,10 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * Does not cover __inittext since that is gone later on. On * 64bit we do not enforce !NX on the low mapping */ - if (within(address, (unsigned long)_text, (unsigned long)_etext)) - pgprot_val(forbidden) |= _PAGE_NX; + if (within(address, ktla_ktva((unsigned long)_text), ktla_ktva((unsigned long)_etext))) + pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask; +#ifdef CONFIG_DEBUG_RODATA /* * The .rodata section needs to be read-only. Using the pfn * catches all aliases. @@ -279,6 +280,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; +#endif #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) /* @@ -317,6 +319,13 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, } #endif +#ifdef CONFIG_PAX_KERNEXEC + if (within(pfn, __pa(ktla_ktva((unsigned long)&_text)), __pa((unsigned long)&_sdata))) { + pgprot_val(forbidden) |= _PAGE_RW; + pgprot_val(forbidden) |= _PAGE_NX & __supported_pte_mask; + } +#endif + prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); return prot; @@ -369,23 +378,37 @@ EXPORT_SYMBOL_GPL(lookup_address); static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) { /* change init_mm */ + pax_open_kernel(); set_pte_atomic(kpte, pte); + #ifdef CONFIG_X86_32 if (!SHARED_KERNEL_PMD) { + +#ifdef CONFIG_PAX_PER_CPU_PGD + unsigned long cpu; +#else struct page *page; +#endif +#ifdef CONFIG_PAX_PER_CPU_PGD + for (cpu = 0; cpu < nr_cpu_ids; ++cpu) { + pgd_t *pgd = get_cpu_pgd(cpu); +#else list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; + pgd_t *pgd = (pgd_t *)page_address(page); +#endif + pud_t *pud; pmd_t *pmd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); + pgd += pgd_index(address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); set_pte_atomic((pte_t *)pmd, pte); } } #endif + pax_close_kernel(); } static int diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f6ff57b..481690f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -361,7 +361,7 @@ int free_memtype(u64 start, u64 end) if (!entry) { printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", - current->comm, current->pid, start, end); + current->comm, task_pid_nr(current), start, end); return -EINVAL; } @@ -492,8 +492,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { printk(KERN_INFO - "Program %s tried to access /dev/mem between %Lx->%Lx.\n", - current->comm, from, to); + "Program %s tried to access /dev/mem between %Lx->%Lx (%Lx).\n", + current->comm, from, to, cursor); return 0; } cursor += PAGE_SIZE; @@ -557,7 +557,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) printk(KERN_INFO "%s:%d ioremap_change_attr failed %s " "for %Lx-%Lx\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), cattr_name(flags), base, (unsigned long long)(base + size)); return -EINVAL; @@ -593,7 +593,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, if (want_flags != flags) { printk(KERN_WARNING "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), cattr_name(want_flags), (unsigned long long)paddr, (unsigned long long)(paddr + size), @@ -615,7 +615,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d map pfn expected mapping type %s" " for %Lx-%Lx, got %s\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), cattr_name(want_flags), (unsigned long long)paddr, (unsigned long long)(paddr + size), diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 8acaddd..4eaa657 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -165,7 +165,7 @@ success: failure: printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, + "%Lx-%Lx %s<->%s\n", current->comm, task_pid_nr(current), start, end, cattr_name(found_type), cattr_name(match->type)); return -EBUSY; } diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index 9f0614d..92ae64a 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -148,7 +148,7 @@ enum reason_type get_ins_type(unsigned long ins_addr) int i; enum reason_type rv = OTHERS; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); @@ -168,7 +168,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) struct prefix_bits prf; int i; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); @@ -191,7 +191,7 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) struct prefix_bits prf; int i; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); @@ -415,7 +415,7 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) struct prefix_bits prf; int i; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(reg_rop); i++) @@ -470,7 +470,7 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) struct prefix_bits prf; int i; - p = (unsigned char *)ins_addr; + p = (unsigned char *)ktla_ktva(ins_addr); p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(imm_wop); i++) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8573b83..4f3ed7e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -84,10 +84,64 @@ static inline void pgd_list_del(pgd_t *pgd) list_del(&page->lru); } -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) +pgdval_t clone_pgd_mask __read_only = ~_PAGE_PRESENT; +void __shadow_user_pgds(pgd_t *dst, const pgd_t *src) +{ + unsigned int count = USER_PGD_PTRS; + while (count--) + *dst++ = __pgd((pgd_val(*src++) | (_PAGE_NX & __supported_pte_mask)) & ~_PAGE_USER); +} +#endif + +#ifdef CONFIG_PAX_PER_CPU_PGD +void __clone_user_pgds(pgd_t *dst, const pgd_t *src) +{ + unsigned int count = USER_PGD_PTRS; + + while (count--) { + pgd_t pgd; + +#ifdef CONFIG_X86_64 + pgd = __pgd(pgd_val(*src++) | _PAGE_USER); +#else + pgd = *src++; +#endif + +#if defined(CONFIG_X86_64) && defined(CONFIG_PAX_MEMORY_UDEREF) + pgd = __pgd(pgd_val(pgd) & clone_pgd_mask); +#endif + + *dst++ = pgd; + } + +} +#endif + +#ifdef CONFIG_X86_64 +#define pxd_t pud_t +#define pyd_t pgd_t +#define paravirt_release_pxd(pfn) paravirt_release_pud(pfn) +#define pxd_free(mm, pud) pud_free((mm), (pud)) +#define pyd_populate(mm, pgd, pud) pgd_populate((mm), (pgd), (pud)) +#define pyd_offset(mm, address) pgd_offset((mm), (address)) +#define PYD_SIZE PGDIR_SIZE +#else +#define pxd_t pmd_t +#define pyd_t pud_t +#define paravirt_release_pxd(pfn) paravirt_release_pmd(pfn) +#define pxd_free(mm, pud) pmd_free((mm), (pud)) +#define pyd_populate(mm, pgd, pud) pud_populate((mm), (pgd), (pud)) +#define pyd_offset(mm, address) pud_offset((mm), (address)) +#define PYD_SIZE PUD_SIZE +#endif + +#ifdef CONFIG_PAX_PER_CPU_PGD +static inline void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) {} +static inline void pgd_dtor(pgd_t *pgd) {} +#else static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) { BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); @@ -128,6 +182,7 @@ static void pgd_dtor(pgd_t *pgd) pgd_list_del(pgd); spin_unlock(&pgd_lock); } +#endif /* * List of all pgd's needed for non-PAE so it can invalidate entries @@ -140,7 +195,7 @@ static void pgd_dtor(pgd_t *pgd) * -- wli */ -#ifdef CONFIG_X86_PAE +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE) /* * In PAE mode, we need to do a cr3 reload (=tlb flush) when * updating the top-level pagetable entries to guarantee the @@ -152,7 +207,7 @@ static void pgd_dtor(pgd_t *pgd) * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate * and initialize the kernel pmds here. */ -#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define PREALLOCATED_PXDS (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -170,36 +225,38 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) */ flush_tlb_mm(mm); } +#elif defined(CONFIG_X86_64) && defined(CONFIG_PAX_PER_CPU_PGD) +#define PREALLOCATED_PXDS USER_PGD_PTRS #else /* !CONFIG_X86_PAE */ /* No need to prepopulate any pagetable entries in non-PAE modes. */ -#define PREALLOCATED_PMDS 0 +#define PREALLOCATED_PXDS 0 #endif /* CONFIG_X86_PAE */ -static void free_pmds(pmd_t *pmds[]) +static void free_pxds(pxd_t *pxds[]) { int i; - for(i = 0; i < PREALLOCATED_PMDS; i++) - if (pmds[i]) - free_page((unsigned long)pmds[i]); + for(i = 0; i < PREALLOCATED_PXDS; i++) + if (pxds[i]) + free_page((unsigned long)pxds[i]); } -static int preallocate_pmds(pmd_t *pmds[]) +static int preallocate_pxds(pxd_t *pxds[]) { int i; bool failed = false; - for(i = 0; i < PREALLOCATED_PMDS; i++) { - pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); - if (pmd == NULL) + for(i = 0; i < PREALLOCATED_PXDS; i++) { + pxd_t *pxd = (pxd_t *)__get_free_page(PGALLOC_GFP); + if (pxd == NULL) failed = true; - pmds[i] = pmd; + pxds[i] = pxd; } if (failed) { - free_pmds(pmds); + free_pxds(pxds); return -ENOMEM; } @@ -212,51 +269,55 @@ static int preallocate_pmds(pmd_t *pmds[]) * preallocate which never got a corresponding vma will need to be * freed manually. */ -static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) +static void pgd_mop_up_pxds(struct mm_struct *mm, pgd_t *pgdp) { int i; - for(i = 0; i < PREALLOCATED_PMDS; i++) { + for(i = 0; i < PREALLOCATED_PXDS; i++) { pgd_t pgd = pgdp[i]; if (pgd_val(pgd) != 0) { - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); + pxd_t *pxd = (pxd_t *)pgd_page_vaddr(pgd); - pgdp[i] = native_make_pgd(0); + set_pgd(pgdp + i, native_make_pgd(0)); - paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); - pmd_free(mm, pmd); + paravirt_release_pxd(pgd_val(pgd) >> PAGE_SHIFT); + pxd_free(mm, pxd); } } } -static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) +static void pgd_prepopulate_pxd(struct mm_struct *mm, pgd_t *pgd, pxd_t *pxds[]) { - pud_t *pud; + pyd_t *pyd; unsigned long addr; int i; - if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ + if (PREALLOCATED_PXDS == 0) /* Work around gcc-3.4.x bug */ return; - pud = pud_offset(pgd, 0); +#ifdef CONFIG_X86_64 + pyd = pyd_offset(mm, 0L); +#else + pyd = pyd_offset(pgd, 0L); +#endif - for (addr = i = 0; i < PREALLOCATED_PMDS; - i++, pud++, addr += PUD_SIZE) { - pmd_t *pmd = pmds[i]; + for (addr = i = 0; i < PREALLOCATED_PXDS; + i++, pyd++, addr += PYD_SIZE) { + pxd_t *pxd = pxds[i]; if (i >= KERNEL_PGD_BOUNDARY) - memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]), - sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(pxd, (pxd_t *)pgd_page_vaddr(swapper_pg_dir[i]), + sizeof(pxd_t) * PTRS_PER_PMD); - pud_populate(mm, pud, pmd); + pyd_populate(mm, pyd, pxd); } } pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *pmds[PREALLOCATED_PMDS]; + pxd_t *pxds[PREALLOCATED_PXDS]; pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); @@ -265,11 +326,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm) mm->pgd = pgd; - if (preallocate_pmds(pmds) != 0) + if (preallocate_pxds(pxds) != 0) goto out_free_pgd; if (paravirt_pgd_alloc(mm) != 0) - goto out_free_pmds; + goto out_free_pxds; /* * Make sure that pre-populating the pmds is atomic with @@ -279,14 +340,14 @@ pgd_t *pgd_alloc(struct mm_struct *mm) spin_lock(&pgd_lock); pgd_ctor(mm, pgd); - pgd_prepopulate_pmd(mm, pgd, pmds); + pgd_prepopulate_pxd(mm, pgd, pxds); spin_unlock(&pgd_lock); return pgd; -out_free_pmds: - free_pmds(pmds); +out_free_pxds: + free_pxds(pxds); out_free_pgd: free_page((unsigned long)pgd); out: @@ -295,7 +356,7 @@ out: void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - pgd_mop_up_pmds(mm, pgd); + pgd_mop_up_pxds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); free_page((unsigned long)pgd); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index cac7184..09a39fa 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -48,10 +48,13 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) return; } pte = pte_offset_kernel(pmd, vaddr); + + pax_open_kernel(); if (pte_val(pteval)) set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); + pax_close_kernel(); /* * It's enough to flush this one mapping. diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index d2e2735..5c6586f 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -8,7 +8,7 @@ #ifdef CONFIG_X86_64 -unsigned long __phys_addr(unsigned long x) +unsigned long __intentional_overflow(-1) __phys_addr(unsigned long x) { if (x >= __START_KERNEL_map) { x -= __START_KERNEL_map; @@ -45,7 +45,7 @@ EXPORT_SYMBOL(__virt_addr_valid); #else #ifdef CONFIG_DEBUG_VIRTUAL -unsigned long __phys_addr(unsigned long x) +unsigned long __intentional_overflow(-1) __phys_addr(unsigned long x) { /* VMALLOC_* aren't constants */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 410531d..0f16030 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -5,8 +5,10 @@ #include #include +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) static int disable_nx __cpuinitdata; +#ifndef CONFIG_PAX_PAGEEXEC /* * noexec = on|off * @@ -28,12 +30,17 @@ static int __init noexec_setup(char *str) return 0; } early_param("noexec", noexec_setup); +#endif + +#endif void __cpuinit x86_configure_nx(void) { +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) if (cpu_has_nx && !disable_nx) __supported_pte_mask |= _PAGE_NX; else +#endif __supported_pte_mask &= ~_PAGE_NX; } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index d6c0418..06a0ad5 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -65,7 +65,11 @@ void leave_mm(int cpu) BUG(); cpumask_clear_cpu(cpu, mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); + +#ifndef CONFIG_PAX_PER_CPU_PGD load_cr3(swapper_pg_dir); +#endif + } EXPORT_SYMBOL_GPL(leave_mm); diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 6687022..ceabcfa 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -9,6 +9,7 @@ */ #include #include +#include /* * Calling convention : @@ -35,6 +36,7 @@ sk_load_word: jle bpf_slow_path_word mov (SKBDATA,%rsi),%eax bswap %eax /* ntohl() */ + pax_force_retaddr ret @@ -53,6 +55,7 @@ sk_load_half: jle bpf_slow_path_half movzwl (SKBDATA,%rsi),%eax rol $8,%ax # ntohs() + pax_force_retaddr ret sk_load_byte_ind: @@ -66,6 +69,7 @@ sk_load_byte: cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ jle bpf_slow_path_byte movzbl (SKBDATA,%rsi),%eax + pax_force_retaddr ret /** @@ -82,6 +86,7 @@ ENTRY(sk_load_byte_msh) movzbl (SKBDATA,%rsi),%ebx and $15,%bl shl $2,%bl + pax_force_retaddr ret CFI_ENDPROC ENDPROC(sk_load_byte_msh) @@ -91,6 +96,7 @@ bpf_error: xor %eax,%eax mov -8(%rbp),%rbx leaveq + pax_force_retaddr ret /* rsi contains offset and can be scratched */ @@ -113,6 +119,7 @@ bpf_slow_path_word: js bpf_error mov -12(%rbp),%eax bswap %eax + pax_force_retaddr ret bpf_slow_path_half: @@ -121,12 +128,14 @@ bpf_slow_path_half: mov -12(%rbp),%ax rol $8,%ax movzwl %ax,%eax + pax_force_retaddr ret bpf_slow_path_byte: bpf_slow_path_common(1) js bpf_error movzbl -12(%rbp),%eax + pax_force_retaddr ret bpf_slow_path_byte_msh: @@ -137,4 +146,5 @@ bpf_slow_path_byte_msh: and $15,%al shl $2,%al xchg %eax,%ebx + pax_force_retaddr ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5a5b6e4..07b4acb 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * Conventions : @@ -45,13 +46,96 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) return ptr + len; } +#ifdef CONFIG_GRKERNSEC_JIT_HARDEN +#define MAX_INSTR_CODE_SIZE 96 +#else +#define MAX_INSTR_CODE_SIZE 64 +#endif + #define EMIT(bytes, len) do { prog = emit_code(prog, bytes, len); } while (0) #define EMIT1(b1) EMIT(b1, 1) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) + +#ifdef CONFIG_GRKERNSEC_JIT_HARDEN +/* original constant will appear in ecx */ +#define DILUTE_CONST_SEQUENCE(_off, _key) \ +do { \ + /* mov ecx, randkey */ \ + EMIT1(0xb9); \ + EMIT(_key, 4); \ + /* xor ecx, randkey ^ off */ \ + EMIT2(0x81, 0xf1); \ + EMIT((_key) ^ (_off), 4); \ +} while (0) +#define SHORT_JMP_LENGTH 2 +#define NEAR_JMP_LENGTH (5 + 8) +#define EMIT1_off32(b1, _off) \ +do { \ + switch (b1) { \ + case 0x05: /* add eax, imm32 */ \ + case 0x2d: /* sub eax, imm32 */ \ + case 0x25: /* and eax, imm32 */ \ + case 0x0d: /* or eax, imm32 */ \ + case 0x3d: /* cmp eax, imm32 */ \ + DILUTE_CONST_SEQUENCE(_off, randkey); \ + EMIT2((b1) - 4, 0xc8); /* convert imm instruction to eax, ecx */\ + break; \ + case 0xb8: /* mov eax, imm32 */ \ + DILUTE_CONST_SEQUENCE(_off, randkey); \ + /* mov eax, ecx */ \ + EMIT2(0x89, 0xc8); \ + break; \ + case 0xa9: /* test eax, imm32 */ \ + DILUTE_CONST_SEQUENCE(_off, randkey); \ + /* test eax, ecx */ \ + EMIT2(0x85, 0xc8); \ + break; \ + case 0xbb: /* mov ebx, imm32 */ \ + DILUTE_CONST_SEQUENCE(_off, randkey); \ + /* mov ebx, ecx */ \ + EMIT2(0x89, 0xcb); \ + break; \ + case 0xbe: /* mov esi, imm32 */ \ + DILUTE_CONST_SEQUENCE(_off, randkey); \ + /* mov esi, ecx */ \ + EMIT2(0x89, 0xce); \ + break; \ + case 0xe8: /* call rel imm32, always to known funcs */ \ + EMIT1(b1); \ + EMIT(_off, 4); \ + break; \ + case 0xe9: /* jmp rel imm32 */ \ + BUG_ON((int)(_off) < 0); \ + EMIT1(b1); \ + EMIT(_off + 8, 4); \ + /* prevent fall-through, we're not called if off = 0 */ \ + EMIT(0xcccccccc, 4); \ + EMIT(0xcccccccc, 4); \ + break; \ + default: \ + BUILD_BUG_ON(1); \ + } \ +} while (0) + +#define EMIT2_off32(b1, b2, _off) \ +do { \ + if ((b1) == 0x69 && (b2) == 0xc0) { /* imul eax, imm32 */ \ + DILUTE_CONST_SEQUENCE(_off, randkey); \ + /* imul eax, ecx */ \ + EMIT3(0x0f, 0xaf, 0xc1); \ + } else { \ + BUILD_BUG_ON(1); \ + } \ +} while (0) +#else #define EMIT1_off32(b1, off) do { EMIT1(b1); EMIT(off, 4);} while (0) +#define EMIT2_off32(b1, b2, off) do { EMIT2(b1, b2); EMIT(off, 4);} while (0) +#define SHORT_JMP_LENGTH 2 +#define NEAR_JMP_LENGTH 5 +#endif #define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */ #define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */ @@ -68,6 +152,7 @@ static inline bool is_near(int offset) #define EMIT_JMP(offset) \ do { \ + BUG_ON((int)(offset) < 0); \ if (offset) { \ if (is_near(offset)) \ EMIT2(0xeb, offset); /* jmp .+off8 */ \ @@ -86,13 +171,33 @@ do { \ #define X86_JBE 0x76 #define X86_JA 0x77 +#ifdef CONFIG_GRKERNSEC_JIT_HARDEN +#define APPEND_FLOW_VERIFY() \ +do { \ + /* mov ecx, randkey */ \ + EMIT1(0xb9); \ + EMIT(randkey, 4); \ + /* cmp ecx, randkey */ \ + EMIT2(0x81, 0xf9); \ + EMIT(randkey, 4); \ + /* jz after 8 int 3s */ \ + EMIT2(0x74, 0x08); \ + EMIT(0xcccccccc, 4); \ + EMIT(0xcccccccc, 4); \ +} while (0) +#else +#define APPEND_FLOW_VERIFY() do { } while (0) +#endif + #define EMIT_COND_JMP(op, offset) \ do { \ + BUG_ON((int)(offset) < 0); \ if (is_near(offset)) \ EMIT2(op, offset); /* jxx .+off8 */ \ else { \ EMIT2(0x0f, op + 0x10); \ - EMIT(offset, 4); /* jxx .+off32 */ \ + EMIT((offset) + 21, 4); /* jxx .+off32 */ \ + APPEND_FLOW_VERIFY(); \ } \ } while (0) @@ -117,10 +222,14 @@ static inline void bpf_flush_icache(void *start, void *end) set_fs(old_fs); } +struct bpf_jit_work { + struct work_struct work; + void *image; +}; void bpf_jit_compile(struct sk_filter *fp) { - u8 temp[64]; + u8 temp[MAX_INSTR_CODE_SIZE]; u8 *prog; unsigned int proglen, oldproglen = 0; int ilen, i; @@ -133,6 +242,9 @@ void bpf_jit_compile(struct sk_filter *fp) unsigned int *addrs; const struct sock_filter *filter = fp->insns; int flen = fp->len; +#ifdef CONFIG_GRKERNSEC_JIT_HARDEN + unsigned int randkey; +#endif if (!bpf_jit_enable) return; @@ -141,11 +253,15 @@ void bpf_jit_compile(struct sk_filter *fp) if (addrs == NULL) return; + fp->work = kmalloc(sizeof(*fp->work), GFP_KERNEL); + if (!fp->work) + goto out; + /* Before first pass, make a rough estimation of addrs[] - * each bpf instruction is translated to less than 64 bytes + * each bpf instruction is translated to less than MAX_INSTR_CODE_SIZE bytes */ for (proglen = 0, i = 0; i < flen; i++) { - proglen += 64; + proglen += MAX_INSTR_CODE_SIZE; addrs[i] = proglen; } cleanup_addr = proglen; /* epilogue address */ @@ -221,6 +337,10 @@ void bpf_jit_compile(struct sk_filter *fp) for (i = 0; i < flen; i++) { unsigned int K = filter[i].k; +#ifdef CONFIG_GRKERNSEC_JIT_HARDEN + randkey = prandom_u32(); +#endif + switch (filter[i].code) { case BPF_S_ALU_ADD_X: /* A += X; */ seen |= SEEN_XREG; @@ -253,10 +373,8 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ALU_MUL_K: /* A *= K */ if (is_imm8(K)) EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */ - else { - EMIT2(0x69, 0xc0); /* imul imm32,%eax */ - EMIT(K, 4); - } + else + EMIT2_off32(0x69, 0xc0, K); /* imul imm32,%eax */ break; case BPF_S_ALU_DIV_X: /* A /= X; */ seen |= SEEN_XREG; @@ -269,15 +387,21 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - (addrs[i] - 4)); } else { - EMIT_COND_JMP(X86_JNE, 2 + 5); + EMIT_COND_JMP(X86_JNE, 2 + NEAR_JMP_LENGTH); CLEAR_A(); EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ } EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ break; case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ +#ifdef CONFIG_GRKERNSEC_JIT_HARDEN + DILUTE_CONST_SEQUENCE(K, randkey); + // imul rax, rcx + EMIT4(0x48, 0x0f, 0xaf, 0xc1); +#else EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ EMIT(K, 4); +#endif EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ break; case BPF_S_ALU_AND_X: @@ -477,7 +601,7 @@ void bpf_jit_compile(struct sk_filter *fp) common_load: seen |= SEEN_DATAREF; if ((int)K < 0) { /* Abort the JIT because __load_pointer() is needed. */ - goto out; + goto error; } t_offset = func - (image + addrs[i]); EMIT1_off32(0xbe, K); /* mov imm32,%esi */ @@ -492,7 +616,7 @@ common_load: seen |= SEEN_DATAREF; case BPF_S_LDX_B_MSH: if ((int)K < 0) { /* Abort the JIT because __load_pointer() is needed. */ - goto out; + goto error; } seen |= SEEN_DATAREF | SEEN_XREG; t_offset = sk_load_byte_msh - (image + addrs[i]); @@ -572,7 +696,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; } if (filter[i].jt != 0) { if (filter[i].jf && f_offset) - t_offset += is_near(f_offset) ? 2 : 5; + t_offset += is_near(f_offset) ? SHORT_JMP_LENGTH : NEAR_JMP_LENGTH; EMIT_COND_JMP(t_op, t_offset); if (filter[i].jf) EMIT_JMP(f_offset); @@ -582,17 +706,18 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; break; default: /* hmm, too complex filter, give up with jit compiler */ - goto out; + goto error; } ilen = prog - temp; if (image) { if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpb_jit_compile fatal error\n"); - kfree(addrs); - module_free(NULL, image); - return; + module_free_exec(NULL, image); + goto error; } + pax_open_kernel(); memcpy(image + proglen, temp, ilen); + pax_close_kernel(); } proglen += ilen; addrs[i] = proglen; @@ -613,11 +738,9 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; break; } if (proglen == oldproglen) { - image = module_alloc(max_t(unsigned int, - proglen, - sizeof(struct work_struct))); + image = module_alloc_exec(proglen); if (!image) - goto out; + goto error; } oldproglen = proglen; } @@ -633,7 +756,10 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; - } + } else +error: + kfree(fp->work); + out: kfree(addrs); return; @@ -641,18 +767,20 @@ out: static void jit_free_defer(struct work_struct *arg) { - module_free(NULL, arg); + module_free_exec(NULL, ((struct bpf_jit_work *)arg)->image); + kfree(arg); } /* run from softirq, we must use a work_struct to call - * module_free() from process context + * module_free_exec() from process context */ void bpf_jit_free(struct sk_filter *fp) { if (fp->bpf_func != sk_run_filter) { - struct work_struct *work = (struct work_struct *)fp->bpf_func; + struct work_struct *work = &fp->work->work; INIT_WORK(work, jit_free_defer); + fp->work->image = fp->bpf_func; schedule_work(work); } } diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index bff89df..377758a 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -46,11 +46,11 @@ dump_user_backtrace_32(struct stack_frame_ia32 *head) struct stack_frame_ia32 *fp; unsigned long bytes; - bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); + bytes = copy_from_user_nmi(bufhead, (const char __force_user *)head, sizeof(bufhead)); if (bytes != sizeof(bufhead)) return NULL; - fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); + fp = (struct stack_frame_ia32 __force_kernel *) compat_ptr(bufhead[0].next_frame); oprofile_add_trace(bufhead[0].return_address); @@ -92,7 +92,7 @@ static struct stack_frame *dump_user_backtrace(struct stack_frame *head) struct stack_frame bufhead[2]; unsigned long bytes; - bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); + bytes = copy_from_user_nmi(bufhead, (const char __force_user *)head, sizeof(bufhead)); if (bytes != sizeof(bufhead)) return NULL; @@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) { struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) dump_trace(NULL, regs, (unsigned long *)stack, 0, diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 6bc0899..13d2579 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "op_counter.h" #include "op_x86_model.h" @@ -759,8 +760,11 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (ret) return ret; - if (!model->num_virt_counters) - model->num_virt_counters = model->num_counters; + if (!model->num_virt_counters) { + pax_open_kernel(); + *(unsigned int *)&model->num_virt_counters = model->num_counters; + pax_close_kernel(); + } mux_init(ops); diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 303f086..d020916 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -519,9 +519,11 @@ static int op_amd_init(struct oprofile_operations *ops) num_counters = AMD64_NUM_COUNTERS; } - op_amd_spec.num_counters = num_counters; - op_amd_spec.num_controls = num_counters; - op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); + pax_open_kernel(); + *(unsigned int *)&op_amd_spec.num_counters = num_counters; + *(unsigned int *)&op_amd_spec.num_controls = num_counters; + *(unsigned int *)&op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); + pax_close_kernel(); return 0; } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index d90528e..0127e2b 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -221,8 +222,10 @@ static void arch_perfmon_setup_counters(void) num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER); - op_arch_perfmon_spec.num_counters = num_counters; - op_arch_perfmon_spec.num_controls = num_counters; + pax_open_kernel(); + *(unsigned int *)&op_arch_perfmon_spec.num_counters = num_counters; + *(unsigned int *)&op_arch_perfmon_spec.num_controls = num_counters; + pax_close_kernel(); } static int arch_perfmon_init(struct oprofile_operations *ignore) diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 71e8a67..6a313bb 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -52,7 +52,7 @@ struct op_x86_model_spec { void (*switch_ctrl)(struct op_x86_model_spec const *model, struct op_msrs const * const msrs); #endif -}; +} __do_const; struct op_counter_config; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 385a940..b11662d 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -355,7 +355,7 @@ static int __cpuinit amd_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata amd_cpu_notifier = { +static struct notifier_block amd_cpu_notifier = { .notifier_call = amd_cpu_notify, }; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 372e9b8..e775a6c 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -50,7 +50,7 @@ struct irq_router { struct irq_router_handler { u16 vendor; int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); -}; +} __do_const; int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; @@ -794,7 +794,7 @@ static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router return 0; } -static __initdata struct irq_router_handler pirq_routers[] = { +static __initconst const struct irq_router_handler pirq_routers[] = { { PCI_VENDOR_ID_INTEL, intel_router_probe }, { PCI_VENDOR_ID_AL, ali_router_probe }, { PCI_VENDOR_ID_ITE, ite_router_probe }, @@ -821,7 +821,7 @@ static struct pci_dev *pirq_router_dev; static void __init pirq_find_router(struct irq_router *r) { struct irq_routing_table *rt = pirq_table; - struct irq_router_handler *h; + const struct irq_router_handler *h; #ifdef CONFIG_PCI_BIOS if (!rt->signature) { @@ -1094,7 +1094,7 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id __initdata pciirq_dmi_table[] = { +static const struct dmi_system_id __initconst pciirq_dmi_table[] = { { .callback = fix_broken_hp_bios_irq9, .ident = "HP Pavilion N5400 Series Laptop", diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index cb29191..036766d 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c @@ -234,7 +234,9 @@ int __init pci_mrst_init(void) printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n"); pci_mmcfg_late_init(); pcibios_enable_irq = mrst_pci_irq_enable; - pci_root_ops = pci_mrst_ops; + pax_open_kernel(); + memcpy((void *)&pci_root_ops, &pci_mrst_ops, sizeof(pci_mrst_ops)); + pax_close_kernel(); /* Continue with standard init */ return 1; } diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index db0e9a5..0372c14 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -79,50 +79,93 @@ union bios32 { static struct { unsigned long address; unsigned short segment; -} bios32_indirect = { 0, __KERNEL_CS }; +} bios32_indirect __read_only = { 0, __PCIBIOS_CS }; /* * Returns the entry point for the given service, NULL on error */ -static unsigned long bios32_service(unsigned long service) +static unsigned long __devinit bios32_service(unsigned long service) { unsigned char return_code; /* %al */ unsigned long address; /* %ebx */ unsigned long length; /* %ecx */ unsigned long entry; /* %edx */ unsigned long flags; + struct desc_struct d, *gdt; local_irq_save(flags); - __asm__("lcall *(%%edi); cld" + + gdt = get_cpu_gdt_table(smp_processor_id()); + + pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x9B, 0xC); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); + pack_descriptor(&d, 0UL, 0xFFFFFUL, 0x93, 0xC); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); + + __asm__("movw %w7, %%ds; lcall *(%%edi); push %%ss; pop %%ds; cld" : "=a" (return_code), "=b" (address), "=c" (length), "=d" (entry) : "0" (service), "1" (0), - "D" (&bios32_indirect)); + "D" (&bios32_indirect), + "r"(__PCIBIOS_DS) + : "memory"); + + pax_open_kernel(); + gdt[GDT_ENTRY_PCIBIOS_CS].a = 0; + gdt[GDT_ENTRY_PCIBIOS_CS].b = 0; + gdt[GDT_ENTRY_PCIBIOS_DS].a = 0; + gdt[GDT_ENTRY_PCIBIOS_DS].b = 0; + pax_close_kernel(); + local_irq_restore(flags); switch (return_code) { - case 0: - return address + entry; - case 0x80: /* Not present */ - printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); - return 0; - default: /* Shouldn't happen */ - printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", - service, return_code); + case 0: { + int cpu; + unsigned char flags; + + printk(KERN_INFO "bios32_service: base:%08lx length:%08lx entry:%08lx\n", address, length, entry); + if (address >= 0xFFFF0 || length > 0x100000 - address || length <= entry) { + printk(KERN_WARNING "bios32_service: not valid\n"); return 0; + } + address = address + PAGE_OFFSET; + length += 16UL; /* some BIOSs underreport this... */ + flags = 4; + if (length >= 64*1024*1024) { + length >>= PAGE_SHIFT; + flags |= 8; + } + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + gdt = get_cpu_gdt_table(cpu); + pack_descriptor(&d, address, length, 0x9b, flags); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_CS, &d, DESCTYPE_S); + pack_descriptor(&d, address, length, 0x93, flags); + write_gdt_entry(gdt, GDT_ENTRY_PCIBIOS_DS, &d, DESCTYPE_S); + } + return entry; + } + case 0x80: /* Not present */ + printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", + service, return_code); + return 0; } } static struct { unsigned long address; unsigned short segment; -} pci_indirect = { 0, __KERNEL_CS }; +} pci_indirect __read_only = { 0, __PCIBIOS_CS }; -static int pci_bios_present; +static int pci_bios_present __read_only; static int __devinit check_pcibios(void) { @@ -131,11 +174,13 @@ static int __devinit check_pcibios(void) unsigned long flags, pcibios_entry; if ((pcibios_entry = bios32_service(PCI_SERVICE))) { - pci_indirect.address = pcibios_entry + PAGE_OFFSET; + pci_indirect.address = pcibios_entry; local_irq_save(flags); - __asm__( - "lcall *(%%edi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%edi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -144,7 +189,8 @@ static int __devinit check_pcibios(void) "=b" (ebx), "=c" (ecx) : "1" (PCIBIOS_PCI_BIOS_PRESENT), - "D" (&pci_indirect) + "D" (&pci_indirect), + "r" (__PCIBIOS_DS) : "memory"); local_irq_restore(flags); @@ -189,7 +235,10 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, switch (len) { case 1: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -198,7 +247,8 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, : "1" (PCIBIOS_READ_CONFIG_BYTE), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); /* * Zero-extend the result beyond 8 bits, do not trust the * BIOS having done it: @@ -206,7 +256,10 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, *value &= 0xff; break; case 2: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -215,7 +268,8 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, : "1" (PCIBIOS_READ_CONFIG_WORD), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); /* * Zero-extend the result beyond 16 bits, do not trust the * BIOS having done it: @@ -223,7 +277,10 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, *value &= 0xffff; break; case 4: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -232,7 +289,8 @@ static int pci_bios_read(unsigned int seg, unsigned int bus, : "1" (PCIBIOS_READ_CONFIG_DWORD), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; } @@ -256,7 +314,10 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, switch (len) { case 1: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -265,10 +326,14 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; case 2: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -277,10 +342,14 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; case 4: - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w6, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n\t" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -289,7 +358,8 @@ static int pci_bios_write(unsigned int seg, unsigned int bus, "c" (value), "b" (bx), "D" ((long)reg), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); break; } @@ -394,10 +464,13 @@ struct irq_routing_table * pcibios_get_irq_routing_table(void) DBG("PCI: Fetching IRQ routing table... "); __asm__("push %%es\n\t" + "movw %w8, %%ds\n\t" "push %%ds\n\t" "pop %%es\n\t" - "lcall *(%%esi); cld\n\t" + "lcall *%%ss:(%%esi); cld\n\t" "pop %%es\n\t" + "push %%ss\n\t" + "pop %%ds\n" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -408,7 +481,8 @@ struct irq_routing_table * pcibios_get_irq_routing_table(void) "1" (0), "D" ((long) &opt), "S" (&pci_indirect), - "m" (opt) + "m" (opt), + "r" (__PCIBIOS_DS) : "memory"); DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); if (ret & 0xff00) @@ -432,7 +506,10 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) { int ret; - __asm__("lcall *(%%esi); cld\n\t" + __asm__("movw %w5, %%ds\n\t" + "lcall *%%ss:(%%esi); cld\n\t" + "push %%ss\n\t" + "pop %%ds\n" "jc 1f\n\t" "xor %%ah, %%ah\n" "1:" @@ -440,7 +517,8 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) : "0" (PCIBIOS_SET_PCI_HW_INT), "b" ((dev->bus->number << 8) | dev->devfn), "c" ((irq << 8) | (pin + 10)), - "S" (&pci_indirect)); + "S" (&pci_indirect), + "r" (__PCIBIOS_DS)); return !(ret & 0xff00); } EXPORT_SYMBOL(pcibios_set_irq_routing); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e56da77..cba13dc 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -747,6 +747,8 @@ void __init efi_enter_virtual_mode(void) BUG_ON(!efi.systab); + efi_setup_pgd(); + status = phys_efi_set_virtual_address_map( memmap.desc_size * count, memmap.desc_size, diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e4469..94d16d7 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -44,11 +44,22 @@ void efi_call_phys_prelog(void) { struct desc_ptr gdt_descr; +#ifdef CONFIG_PAX_KERNEXEC + struct desc_struct d; +#endif + local_irq_save(efi_rt_eflags); load_cr3(initial_page_table); __flush_tlb_all(); +#ifdef CONFIG_PAX_KERNEXEC + pack_descriptor(&d, 0, 0xFFFFF, 0x9B, 0xC); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_CS, &d, DESCTYPE_S); + pack_descriptor(&d, 0, 0xFFFFF, 0x93, 0xC); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_DS, &d, DESCTYPE_S); +#endif + gdt_descr.address = __pa(get_cpu_gdt_table(0)); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); @@ -58,12 +69,29 @@ void efi_call_phys_epilog(void) { struct desc_ptr gdt_descr; +#ifdef CONFIG_PAX_KERNEXEC + struct desc_struct d; + + memset(&d, 0, sizeof d); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_CS, &d, DESCTYPE_S); + write_gdt_entry(get_cpu_gdt_table(0), GDT_ENTRY_KERNEXEC_EFI_DS, &d, DESCTYPE_S); +#endif + gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); +#ifdef CONFIG_PAX_PER_CPU_PGD + load_cr3(get_cpu_pgd(smp_processor_id())); +#else load_cr3(swapper_pg_dir); +#endif + __flush_tlb_all(); local_irq_restore(efi_rt_eflags); } + +void __init efi_setup_pgd(void) +{ +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 0fba86d..2363f0b 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -75,6 +75,11 @@ void __init efi_call_phys_prelog(void) vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } + +#ifdef CONFIG_PAX_PER_CPU_PGD + load_cr3(swapper_pg_dir); +#endif + __flush_tlb_all(); } @@ -88,6 +93,11 @@ void __init efi_call_phys_epilog(void) for (pgd = 0; pgd < n_pgds; pgd++) set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); kfree(save_pgd); + +#ifdef CONFIG_PAX_PER_CPU_PGD + load_cr3(get_cpu_pgd(smp_processor_id())); +#endif + __flush_tlb_all(); local_irq_restore(efi_flags); early_code_mapping_set_exec(0); @@ -109,3 +119,19 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, return (void __iomem *)__va(phys_addr); } + +void __init efi_setup_pgd(void) +{ + /* PaX: We need to disable the NX bit in the PGD, otherwise we won't be + * able to execute the EFI services. + */ + if (__supported_pte_mask & _PAGE_NX) { + unsigned long addr = (unsigned long) __va(0); + pgd_t pe = __pgd(pgd_val(*pgd_offset_k(addr)) & ~_PAGE_NX); + pr_info("PAX: Disabling NX protection for low memory map.\n"); +#ifdef CONFIG_PAX_PER_CPU_PGD + set_pgd(pgd_offset_cpu(0, addr), pe); +#endif + set_pgd(pgd_offset_k(addr), pe); + } +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index fbe66e6..eae5e38 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -6,7 +6,9 @@ */ #include +#include #include +#include /* * efi_call_phys(void *, ...) is a function with variable parameters. @@ -20,7 +22,7 @@ * service functions will comply with gcc calling convention, too. */ -.text +__INIT ENTRY(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been @@ -36,10 +38,24 @@ ENTRY(efi_call_phys) * The mapping of lower virtual memory has been created in prelog and * epilog. */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx +#ifdef CONFIG_PAX_KERNEXEC + movl $(__KERNEXEC_EFI_DS), %edx + mov %edx, %ds + mov %edx, %es + mov %edx, %ss + addl $2f,(1f) + ljmp *(1f) + +__INITDATA +1: .long __LOAD_PHYSICAL_ADDR, __KERNEXEC_EFI_CS +.previous + +2: + subl $2b,(1b) +#else + jmp 1f-__PAGE_OFFSET 1: +#endif /* * 2. Now on the top of stack is the return @@ -47,14 +63,8 @@ ENTRY(efi_call_phys) * parameter 2, ..., param n. To make things easy, we save the return * address of efi_call_phys in a global variable. */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx + popl (saved_return_addr) + popl (efi_rt_function_ptr) /* * 3. Clear PG bit in %CR0. @@ -73,9 +83,8 @@ ENTRY(efi_call_phys) /* * 5. Call the physical function. */ - jmp *%ecx + call *(efi_rt_function_ptr-__PAGE_OFFSET) -2: /* * 6. After EFI runtime service returns, control will return to * following instruction. We'd better readjust stack pointer first. @@ -88,35 +97,36 @@ ENTRY(efi_call_phys) movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: + /* * 8. Now restore the virtual mode from flat mode by * adding EIP with PAGE_OFFSET. */ - movl $1f, %edx - jmp *%edx +#ifdef CONFIG_PAX_KERNEXEC + movl $(__KERNEL_DS), %edx + mov %edx, %ds + mov %edx, %es + mov %edx, %ss + ljmp $(__KERNEL_CS),$1f +#else + jmp 1f+__PAGE_OFFSET +#endif 1: /* * 9. Balance the stack. And because EAX contain the return value, * we'd better not clobber it. */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx + pushl (efi_rt_function_ptr) /* - * 10. Push the saved return address onto the stack and return. + * 10. Return to the saved return address. */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret + jmpl *(saved_return_addr) ENDPROC(efi_call_phys) .previous -.data +__INITDATA saved_return_addr: .long 0 efi_rt_function_ptr: diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07cca..2c8427d 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -7,6 +7,7 @@ */ #include +#include #define SAVE_XMM \ mov %rsp, %rax; \ @@ -40,6 +41,7 @@ ENTRY(efi_call0) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call0) @@ -50,6 +52,7 @@ ENTRY(efi_call1) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call1) @@ -60,6 +63,7 @@ ENTRY(efi_call2) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call2) @@ -71,6 +75,7 @@ ENTRY(efi_call3) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call3) @@ -83,6 +88,7 @@ ENTRY(efi_call4) call *%rdi addq $32, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call4) @@ -96,6 +102,7 @@ ENTRY(efi_call5) call *%rdi addq $48, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call5) @@ -112,5 +119,6 @@ ENTRY(efi_call6) call *%rdi addq $48, %rsp RESTORE_XMM + pax_force_retaddr 0, 1 ret ENDPROC(efi_call6) diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index ad4ec1c..686479e 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -76,18 +76,20 @@ struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; EXPORT_SYMBOL_GPL(sfi_mrtc_array); int sfi_mrtc_num; -static void mrst_power_off(void) +static __noreturn void mrst_power_off(void) { if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1); + BUG(); } -static void mrst_reboot(void) +static __noreturn void mrst_reboot(void) { if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); else intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); + BUG(); } /* parse all the mtimer info to a static mtimer array */ diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index d6ee929..3637cb5 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -156,7 +156,7 @@ void * __init prom_early_alloc(unsigned long size) return res; } -static struct of_pdt_ops prom_olpc_ops __initdata = { +static struct of_pdt_ops prom_olpc_ops __initconst = { .nextprop = olpc_dt_nextprop, .getproplen = olpc_dt_getproplen, .getproperty = olpc_dt_getproperty, diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 43c9f6a..2b63a0b 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -132,7 +132,7 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = init_tss + cpu; set_tss_desc(cpu, t); /* * This just modifies memory; should not be @@ -142,8 +142,6 @@ static void fix_processor_context(void) */ #ifdef CONFIG_X86_64 - get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; - syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index e529730..8d08690 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -11,10 +11,13 @@ #include #include +#include "../../../include/generated/autoconf.h" + static void die(char *fmt, ...); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static Elf32_Ehdr ehdr; +static Elf32_Phdr *phdr; static unsigned long reloc_count, reloc_idx; static unsigned long *relocs; static unsigned long reloc16_count, reloc16_idx; @@ -322,9 +325,39 @@ static void read_ehdr(FILE *fp) } } +static void read_phdrs(FILE *fp) +{ + unsigned int i; + + phdr = calloc(ehdr.e_phnum, sizeof(Elf32_Phdr)); + if (!phdr) { + die("Unable to allocate %d program headers\n", + ehdr.e_phnum); + } + if (fseek(fp, ehdr.e_phoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_phoff, strerror(errno)); + } + if (fread(phdr, sizeof(*phdr), ehdr.e_phnum, fp) != ehdr.e_phnum) { + die("Cannot read ELF program headers: %s\n", + strerror(errno)); + } + for(i = 0; i < ehdr.e_phnum; i++) { + phdr[i].p_type = elf32_to_cpu(phdr[i].p_type); + phdr[i].p_offset = elf32_to_cpu(phdr[i].p_offset); + phdr[i].p_vaddr = elf32_to_cpu(phdr[i].p_vaddr); + phdr[i].p_paddr = elf32_to_cpu(phdr[i].p_paddr); + phdr[i].p_filesz = elf32_to_cpu(phdr[i].p_filesz); + phdr[i].p_memsz = elf32_to_cpu(phdr[i].p_memsz); + phdr[i].p_flags = elf32_to_cpu(phdr[i].p_flags); + phdr[i].p_align = elf32_to_cpu(phdr[i].p_align); + } + +} + static void read_shdrs(FILE *fp) { - int i; + unsigned int i; Elf32_Shdr shdr; secs = calloc(ehdr.e_shnum, sizeof(struct section)); @@ -359,7 +392,7 @@ static void read_shdrs(FILE *fp) static void read_strtabs(FILE *fp) { - int i; + unsigned int i; for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_STRTAB) { @@ -384,7 +417,7 @@ static void read_strtabs(FILE *fp) static void read_symtabs(FILE *fp) { - int i,j; + unsigned int i,j; for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_SYMTAB) { @@ -417,7 +450,9 @@ static void read_symtabs(FILE *fp) static void read_relocs(FILE *fp) { - int i,j; + unsigned int i,j; + uint32_t base; + for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_REL) { @@ -437,9 +472,22 @@ static void read_relocs(FILE *fp) die("Cannot read symbol table: %s\n", strerror(errno)); } + base = 0; + +#ifdef CONFIG_X86_32 + for (j = 0; j < ehdr.e_phnum; j++) { + if (phdr[j].p_type != PT_LOAD ) + continue; + if (secs[sec->shdr.sh_info].shdr.sh_offset < phdr[j].p_offset || secs[sec->shdr.sh_info].shdr.sh_offset >= phdr[j].p_offset + phdr[j].p_filesz) + continue; + base = CONFIG_PAGE_OFFSET + phdr[j].p_paddr - phdr[j].p_vaddr; + break; + } +#endif + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel = &sec->reltab[j]; - rel->r_offset = elf32_to_cpu(rel->r_offset); + rel->r_offset = elf32_to_cpu(rel->r_offset) + base; rel->r_info = elf32_to_cpu(rel->r_info); } } @@ -448,13 +496,13 @@ static void read_relocs(FILE *fp) static void print_absolute_symbols(void) { - int i; + unsigned int i; printf("Absolute symbols\n"); printf(" Num: Value Size Type Bind Visibility Name\n"); for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; char *sym_strtab; - int j; + unsigned int j; if (sec->shdr.sh_type != SHT_SYMTAB) { continue; @@ -481,14 +529,14 @@ static void print_absolute_symbols(void) static void print_absolute_relocs(void) { - int i, printed = 0; + unsigned int i, printed = 0; for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; struct section *sec_applies, *sec_symtab; char *sym_strtab; Elf32_Sym *sh_symtab; - int j; + unsigned int j; if (sec->shdr.sh_type != SHT_REL) { continue; } @@ -550,13 +598,13 @@ static void print_absolute_relocs(void) static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), int use_real_mode) { - int i; + unsigned int i; /* Walk through the relocations */ for (i = 0; i < ehdr.e_shnum; i++) { char *sym_strtab; Elf32_Sym *sh_symtab; struct section *sec_applies, *sec_symtab; - int j; + unsigned int j; struct section *sec = &secs[i]; if (sec->shdr.sh_type != SHT_REL) { @@ -580,6 +628,22 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; r_type = ELF32_R_TYPE(rel->r_info); + /* Don't relocate actual per-cpu variables, they are absolute indices, not addresses */ + if (!strcmp(sec_name(sym->st_shndx), ".data..percpu") && strcmp(sym_name(sym_strtab, sym), "__per_cpu_load")) + continue; + +#if defined(CONFIG_PAX_KERNEXEC) && defined(CONFIG_X86_32) + /* Don't relocate actual code, they are relocated implicitly by the base address of KERNEL_CS */ + if (!strcmp(sec_name(sym->st_shndx), ".text.end") && !strcmp(sym_name(sym_strtab, sym), "_etext")) + continue; + if (!strcmp(sec_name(sym->st_shndx), ".init.text")) + continue; + if (!strcmp(sec_name(sym->st_shndx), ".exit.text")) + continue; + if (!strcmp(sec_name(sym->st_shndx), ".text") && strcmp(sym_name(sym_strtab, sym), "__LOAD_PHYSICAL_ADDR")) + continue; +#endif + shn_abs = sym->st_shndx == SHN_ABS; switch (r_type) { @@ -676,7 +740,7 @@ static int write32(unsigned int v, FILE *f) static void emit_relocs(int as_text, int use_real_mode) { - int i; + unsigned int i; /* Count how many relocations I have and allocate space for them. */ reloc_count = 0; walk_relocs(count_reloc, use_real_mode); @@ -803,6 +867,7 @@ int main(int argc, char **argv) fname, strerror(errno)); } read_ehdr(fp); + read_phdrs(fp); read_shdrs(fp); read_strtabs(fp); read_symtabs(fp); diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index c6c7131..2851e03 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -259,7 +259,7 @@ out: if (unlikely(task == current && !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) { printk(KERN_ERR "get_tls_entry: task with pid %d got here " - "without flushed TLS.", current->pid); + "without flushed TLS.", task_pid_nr(current)); } return 0; diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5d17950..2253fc9 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -137,7 +137,7 @@ quiet_cmd_vdso = VDSO $@ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) GCOV_PROFILE := n # diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 468d591..8e80a0a 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -25,6 +25,7 @@ #include #include #include +#include enum { VDSO_DISABLED = 0, @@ -226,7 +227,7 @@ static inline void map_compat_vdso(int map) void enable_sep_cpu(void) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); @@ -249,7 +250,7 @@ static int __init gate_vma_init(void) gate_vma.vm_start = FIXADDR_USER_START; gate_vma.vm_end = FIXADDR_USER_END; gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; - gate_vma.vm_page_prot = __P101; + gate_vma.vm_page_prot = vm_get_page_prot(gate_vma.vm_flags); /* * Make sure the vDSO gets into every core dump. * Dumping its contents makes post-mortem fully interpretable later @@ -331,14 +332,14 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (compat) addr = VDSO_HIGH_BASE; else { - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } } - current->mm->context.vdso = (void *)addr; + current->mm->context.vdso = addr; if (compat_uses_vma || !compat) { /* @@ -361,11 +362,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } current_thread_info()->sysenter_return = - VDSO32_SYMBOL(addr, SYSENTER_RETURN); + (__force void __user *)VDSO32_SYMBOL(addr, SYSENTER_RETURN); up_fail: if (ret) - current->mm->context.vdso = NULL; + current->mm->context.vdso = 0; up_write(&mm->mmap_sem); @@ -412,8 +413,14 @@ __initcall(ia32_binfmt_init); const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso) return "[vdso]"; + +#ifdef CONFIG_PAX_SEGMEXEC + if (vma->vm_mm && vma->vm_mirror && vma->vm_mirror->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; +#endif + return NULL; } @@ -423,7 +430,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) * Check to see if the corresponding task was created in compat vdso * mode. */ - if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) + if (mm && mm->context.vdso == VDSO_HIGH_BASE) return &gate_vma; return NULL; } diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 0ff8815..7abe843 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -16,8 +16,6 @@ #include #include -unsigned int __read_mostly vdso_enabled = 1; - extern char vdso_start[], vdso_end[]; extern unsigned short vdso_sync_cpuid; @@ -119,13 +117,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long addr; + unsigned long addr = 0; int ret; - if (!vdso_enabled) - return 0; - down_write(&mm->mmap_sem); + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP)) +#endif + addr = vdso_addr(mm->start_stack, vdso_size); addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); if (IS_ERR_VALUE(addr)) { @@ -133,26 +133,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto up_fail; } - current->mm->context.vdso = (void *)addr; + mm->context.vdso = addr; ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pages); - if (ret) { - current->mm->context.vdso = NULL; - goto up_fail; - } + + if (ret) + mm->context.vdso = 0; up_fail: up_write(&mm->mmap_sem); return ret; } - -static __init int vdso_setup(char *s) -{ - vdso_enabled = simple_strtoul(s, NULL, 0); - return 0; -} -__setup("vdso=", vdso_setup); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 26c731a..fb510c7 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -8,6 +8,7 @@ config XEN select PARAVIRT_CLOCK depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) depends on X86_CMPXCHG && X86_TSC + depends on !GRKERNSEC_CONFIG_AUTO || GRKERNSEC_CONFIG_VIRT_XEN help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5189fe8..1bf8944 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -86,8 +86,6 @@ EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; -void *xen_initial_gdt; - RESERVE_BRK(shared_info_page_brk, PAGE_SIZE); __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); @@ -382,8 +380,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - unsigned long frames[pages]; + unsigned long frames[65536 / PAGE_SIZE]; int f; /* @@ -431,8 +428,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - unsigned long frames[pages]; + unsigned long frames[(GDT_SIZE + PAGE_SIZE - 1) / PAGE_SIZE]; int f; /* @@ -440,7 +436,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) * 8-byte entries, or 16 4k pages.. */ - BUG_ON(size > 65536); + BUG_ON(size > GDT_SIZE); BUG_ON(va & ~PAGE_MASK); for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { @@ -1072,30 +1068,30 @@ static const struct pv_apic_ops xen_apic_ops __initconst = { #endif }; -static void xen_reboot(int reason) +static __noreturn void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r)) - BUG(); + HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); + BUG(); } -static void xen_restart(char *msg) +static __noreturn void xen_restart(char *msg) { xen_reboot(SHUTDOWN_reboot); } -static void xen_emergency_restart(void) +static __noreturn void xen_emergency_restart(void) { xen_reboot(SHUTDOWN_reboot); } -static void xen_machine_halt(void) +static __noreturn void xen_machine_halt(void) { xen_reboot(SHUTDOWN_poweroff); } -static void xen_machine_power_off(void) +static void __noreturn xen_machine_power_off(void) { if (pm_power_off) pm_power_off(); @@ -1144,6 +1140,9 @@ static void __init xen_setup_stackprotector(void) pv_cpu_ops.load_gdt = xen_load_gdt_boot; setup_stack_canary_segment(0); +#ifdef CONFIG_X86_64 + load_percpu_segment(0); +#endif switch_to_new_gdt(0); pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry; @@ -1196,7 +1195,17 @@ asmlinkage void __init xen_start_kernel(void) __userpte_alloc_gfp &= ~__GFP_HIGHMEM; /* Work out if we support NX */ - x86_configure_nx(); +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) + if ((cpuid_eax(0x80000000) & 0xffff0000) == 0x80000000 && + (cpuid_edx(0x80000001) & (1U << (X86_FEATURE_NX & 31)))) { + unsigned l, h; + + __supported_pte_mask |= _PAGE_NX; + rdmsr(MSR_EFER, l, h); + l |= EFER_NX; + wrmsr(MSR_EFER, l, h); + } +#endif xen_setup_features(); @@ -1227,13 +1236,6 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; - /* - * The only reliable way to retain the initial address of the - * percpu gdt_page is to remember it here, so we can go and - * mark it RW later, when the initial percpu area is freed. - */ - xen_initial_gdt = &per_cpu(gdt_page, 0); - xen_smp_init(); #ifdef CONFIG_ACPI_NUMA @@ -1418,7 +1420,7 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = { +static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fe00be69..c51170f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -365,7 +365,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) return val; } -static pteval_t pte_pfn_to_mfn(pteval_t val) +static pteval_t __intentional_overflow(-1) pte_pfn_to_mfn(pteval_t val) { if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; @@ -1757,6 +1757,9 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, convert_pfn_mfn(init_level4_pgt); convert_pfn_mfn(level3_ident_pgt); convert_pfn_mfn(level3_kernel_pgt); + convert_pfn_mfn(level3_vmalloc_start_pgt); + convert_pfn_mfn(level3_vmalloc_end_pgt); + convert_pfn_mfn(level3_vmemmap_pgt); l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); @@ -1775,7 +1778,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_vmalloc_start_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_vmalloc_end_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_vmemmap_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_vmemmap_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1986,6 +1993,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.set_pud = xen_set_pud; #if PAGETABLE_LEVELS == 4 pv_mmu_ops.set_pgd = xen_set_pgd; + pv_mmu_ops.set_pgd_batched = xen_set_pgd; #endif /* This will work as long as patching hasn't happened yet @@ -2067,6 +2075,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, + .set_pgd_batched = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 6e4d5dc..78c131b 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -209,11 +209,6 @@ static void __init xen_smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); - - /* We've switched to the "real" per-cpu gdt, so make sure the - old memory can be recycled */ - make_lowmem_page_readwrite(xen_initial_gdt); - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } @@ -290,12 +285,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_table(cpu); ctxt->flags = VGCF_IN_KERNEL; - ctxt->user_regs.ds = __USER_DS; - ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.ds = __KERNEL_DS; + ctxt->user_regs.es = __KERNEL_DS; ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = __KERNEL_STACK_CANARY; + savesegment(gs, ctxt->user_regs.gs); #else ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif @@ -346,13 +341,12 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) int rc; per_cpu(current_task, cpu) = idle; + per_cpu(current_tinfo, cpu) = &idle->tinfo; #ifdef CONFIG_X86_32 irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; + per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(idle) - 16 + THREAD_SIZE; #endif xen_setup_runstate_info(cpu); xen_setup_timer(cpu); @@ -536,7 +530,7 @@ static const struct smp_ops xen_smp_ops __initconst = { void __init xen_smp_init(void) { - smp_ops = xen_smp_ops; + memcpy((void *)&smp_ops, &xen_smp_ops, sizeof smp_ops); xen_fill_possible_map(); xen_init_spinlocks(); } diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 7328f71..c457aa7 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -83,14 +83,14 @@ ENTRY(xen_iret) ESP_OFFSET=4 # bytes pushed onto stack /* - * Store vcpu_info pointer for easy access. Do it this way to - * avoid having to reload %fs + * Store vcpu_info pointer for easy access. */ #ifdef CONFIG_SMP - GET_THREAD_INFO(%eax) - movl %ss:TI_cpu(%eax), %eax - movl %ss:__per_cpu_offset(,%eax,4), %eax - mov %ss:xen_vcpu(%eax), %eax + push %fs + mov $(__KERNEL_PERCPU), %eax + mov %eax, %fs + mov PER_CPU_VAR(xen_vcpu), %eax + pop %fs #else movl %ss:xen_vcpu, %eax #endif diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291..3f77960 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -19,6 +19,17 @@ ENTRY(startup_xen) #ifdef CONFIG_X86_32 mov %esi,xen_start_info mov $init_thread_union+THREAD_SIZE,%esp +#ifdef CONFIG_SMP + movl $cpu_gdt_table,%edi + movl $__per_cpu_load,%eax + movw %ax,__KERNEL_PERCPU + 2(%edi) + rorl $16,%eax + movb %al,__KERNEL_PERCPU + 4(%edi) + movb %ah,__KERNEL_PERCPU + 7(%edi) + movl $__per_cpu_end - 1,%eax + subl $__per_cpu_start,%eax + movw %ax,__KERNEL_PERCPU + 0(%edi) +#endif #else mov %rsi,xen_start_info mov $init_thread_union+THREAD_SIZE,%rsp diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b095739..8c17bcd 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,8 +10,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -extern void *xen_initial_gdt; - struct trap_info; void xen_copy_trap_info(struct trap_info *traps); diff --git a/arch/xtensa/variants/dc232b/include/variant/core.h b/arch/xtensa/variants/dc232b/include/variant/core.h index 525bd3d..ef888b1 100644 --- a/arch/xtensa/variants/dc232b/include/variant/core.h +++ b/arch/xtensa/variants/dc232b/include/variant/core.h @@ -119,9 +119,9 @@ ----------------------------------------------------------------------*/ #define XCHAL_ICACHE_LINESIZE 32 /* I-cache line size in bytes */ -#define XCHAL_DCACHE_LINESIZE 32 /* D-cache line size in bytes */ #define XCHAL_ICACHE_LINEWIDTH 5 /* log2(I line size in bytes) */ #define XCHAL_DCACHE_LINEWIDTH 5 /* log2(D line size in bytes) */ +#define XCHAL_DCACHE_LINESIZE (_AC(1,UL) << XCHAL_DCACHE_LINEWIDTH) /* D-cache line size in bytes */ #define XCHAL_ICACHE_SIZE 16384 /* I-cache size in bytes or 0 */ #define XCHAL_DCACHE_SIZE 16384 /* D-cache size in bytes or 0 */ diff --git a/arch/xtensa/variants/fsf/include/variant/core.h b/arch/xtensa/variants/fsf/include/variant/core.h index 2f33760..835e50a 100644 --- a/arch/xtensa/variants/fsf/include/variant/core.h +++ b/arch/xtensa/variants/fsf/include/variant/core.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_CORE_H #define _XTENSA_CORE_H +#include /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -112,9 +113,9 @@ ----------------------------------------------------------------------*/ #define XCHAL_ICACHE_LINESIZE 16 /* I-cache line size in bytes */ -#define XCHAL_DCACHE_LINESIZE 16 /* D-cache line size in bytes */ #define XCHAL_ICACHE_LINEWIDTH 4 /* log2(I line size in bytes) */ #define XCHAL_DCACHE_LINEWIDTH 4 /* log2(D line size in bytes) */ +#define XCHAL_DCACHE_LINESIZE (_AC(1,UL) << XCHAL_DCACHE_LINEWIDTH) /* D-cache line size in bytes */ #define XCHAL_ICACHE_SIZE 8192 /* I-cache size in bytes or 0 */ #define XCHAL_DCACHE_SIZE 8192 /* D-cache size in bytes or 0 */ diff --git a/arch/xtensa/variants/s6000/include/variant/core.h b/arch/xtensa/variants/s6000/include/variant/core.h index af00795..2bb8105 100644 --- a/arch/xtensa/variants/s6000/include/variant/core.h +++ b/arch/xtensa/variants/s6000/include/variant/core.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_CORE_CONFIGURATION_H #define _XTENSA_CORE_CONFIGURATION_H +#include /**************************************************************************** Parameters Useful for Any Code, USER or PRIVILEGED @@ -118,9 +119,9 @@ ----------------------------------------------------------------------*/ #define XCHAL_ICACHE_LINESIZE 16 /* I-cache line size in bytes */ -#define XCHAL_DCACHE_LINESIZE 16 /* D-cache line size in bytes */ #define XCHAL_ICACHE_LINEWIDTH 4 /* log2(I line size in bytes) */ #define XCHAL_DCACHE_LINEWIDTH 4 /* log2(D line size in bytes) */ +#define XCHAL_DCACHE_LINESIZE (_AC(1,UL) << XCHAL_DCACHE_LINEWIDTH) /* D-cache line size in bytes */ #define XCHAL_ICACHE_SIZE 32768 /* I-cache size in bytes or 0 */ #define XCHAL_DCACHE_SIZE 32768 /* D-cache size in bytes or 0 */ diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index 58916af..9b538a6 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -77,7 +77,7 @@ void blk_iopoll_complete(struct blk_iopoll *iopoll) } EXPORT_SYMBOL(blk_iopoll_complete); -static void blk_iopoll_softirq(struct softirq_action *h) +static __latent_entropy void blk_iopoll_softirq(void) { struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); int rearm = 0, budget = blk_iopoll_budget; @@ -209,7 +209,7 @@ static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = { +static struct notifier_block blk_iopoll_cpu_notifier = { .notifier_call = blk_iopoll_cpu_notify, }; diff --git a/block/blk-map.c b/block/blk-map.c index 623e1cd..ca1e109 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -302,7 +302,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf); + do_copy = !blk_rq_aligned(q, addr, len) || object_starts_on_stack(kbuf); if (do_copy) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 1366a89..88178fe 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -17,7 +17,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); * Softirq action handler - move entries to local list and loop over them * while passing them to the queue registered handler. */ -static void blk_done_softirq(struct softirq_action *h) +static __latent_entropy void blk_done_softirq(void) { struct list_head *cpu_list, local_list; @@ -97,7 +97,7 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata blk_cpu_notifier = { +static struct notifier_block blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5eed6a7..0e8abe9 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -361,6 +361,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) /* Group allocation failed. Account the IO to root group */ if (!tg) { tg = td->root_tg; + rcu_read_unlock(); return tg; } diff --git a/block/bsg.c b/block/bsg.c index c0ab25c..9d49f8f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -176,16 +176,24 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, fmode_t has_write_perm) { + unsigned char tmpcmd[sizeof(rq->__cmd)]; + unsigned char *cmdptr; + if (hdr->request_len > BLK_MAX_CDB) { rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); if (!rq->cmd) return -ENOMEM; - } + cmdptr = rq->cmd; + } else + cmdptr = tmpcmd; - if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request, + if (copy_from_user(cmdptr, (void __user *)(unsigned long)hdr->request, hdr->request_len)) return -EFAULT; + if (cmdptr != rq->cmd) + memcpy(rq->cmd, cmdptr, hdr->request_len); + if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { if (blk_verify_command(rq->cmd, has_write_perm)) return -EPERM; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 7b72502..3d7b647 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -155,7 +155,7 @@ static int compat_cdrom_generic_command(struct block_device *bdev, fmode_t mode, cgc = compat_alloc_user_space(sizeof(*cgc)); cgc32 = compat_ptr(arg); - if (copy_in_user(&cgc->cmd, &cgc32->cmd, sizeof(cgc->cmd)) || + if (copy_in_user(cgc->cmd, cgc32->cmd, sizeof(cgc->cmd)) || get_user(data, &cgc32->buffer) || put_user(compat_ptr(data), &cgc->buffer) || copy_in_user(&cgc->buflen, &cgc32->buflen, @@ -340,7 +340,7 @@ static int compat_fd_ioctl(struct block_device *bdev, fmode_t mode, err |= __get_user(f->spec1, &uf->spec1); err |= __get_user(f->fmt_gap, &uf->fmt_gap); err |= __get_user(name, &uf->name); - f->name = compat_ptr(name); + f->name = (void __force_kernel *)compat_ptr(name); if (err) { err = -EFAULT; goto out; diff --git a/block/genhd.c b/block/genhd.c index 424d1fa..8e99b22 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -472,21 +472,24 @@ static char *bdevt_str(dev_t devt, char *buf) /* * Register device numbers dev..(dev+range-1) - * range must be nonzero + * Noop if @range is zero. * The hash chain is sorted on range, so that subranges can override. */ void blk_register_region(dev_t devt, unsigned long range, struct module *module, struct kobject *(*probe)(dev_t, int *, void *), int (*lock)(dev_t, void *), void *data) { - kobj_map(bdev_map, devt, range, module, probe, lock, data); + if (range) + kobj_map(bdev_map, devt, range, module, probe, lock, data); } EXPORT_SYMBOL(blk_register_region); +/* undo blk_register_region(), noop if @range is zero */ void blk_unregister_region(dev_t devt, unsigned long range) { - kobj_unmap(bdev_map, devt, range); + if (range) + kobj_unmap(bdev_map, devt, range); } EXPORT_SYMBOL(blk_unregister_region); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index f124268..e5bfd12 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -66,7 +66,7 @@ static int scsi_get_bus(struct request_queue *q, int __user *p) return put_user(0, p); } -static int sg_get_timeout(struct request_queue *q) +static int __intentional_overflow(-1) sg_get_timeout(struct request_queue *q) { return jiffies_to_clock_t(q->sg_timeout); } @@ -223,8 +223,20 @@ EXPORT_SYMBOL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, fmode_t mode) { - if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) + unsigned char tmpcmd[sizeof(rq->__cmd)]; + unsigned char *cmdptr; + + if (rq->cmd != rq->__cmd) + cmdptr = rq->cmd; + else + cmdptr = tmpcmd; + + if (copy_from_user(cmdptr, hdr->cmdp, hdr->cmd_len)) return -EFAULT; + + if (cmdptr != rq->cmd) + memcpy(rq->cmd, cmdptr, hdr->cmd_len); + if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) return -EPERM; @@ -433,6 +445,8 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, int err; unsigned int in_len, out_len, bytes, opcode, cmdlen; char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; + unsigned char tmpcmd[sizeof(rq->__cmd)]; + unsigned char *cmdptr; if (!sic) return -EINVAL; @@ -466,9 +480,18 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, */ err = -EFAULT; rq->cmd_len = cmdlen; - if (copy_from_user(rq->cmd, sic->data, cmdlen)) + + if (rq->cmd != rq->__cmd) + cmdptr = rq->cmd; + else + cmdptr = tmpcmd; + + if (copy_from_user(cmdptr, sic->data, cmdlen)) goto error; + if (rq->cmd != cmdptr) + memcpy(rq->cmd, cmdptr, cmdlen); + if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; diff --git a/crypto/api.c b/crypto/api.c index ac80794..dd053f8 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -42,6 +42,8 @@ static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) return alg; } +static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); + struct crypto_alg *crypto_mod_get(struct crypto_alg *alg) { return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL; diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 75c415d..0b21cd8 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -63,7 +63,7 @@ struct cryptd_blkcipher_ctx { struct cryptd_blkcipher_request_ctx { crypto_completion_t complete; -}; +} __no_const; struct cryptd_hash_ctx { struct crypto_shash *child; @@ -80,7 +80,7 @@ struct cryptd_aead_ctx { struct cryptd_aead_request_ctx { crypto_completion_t complete; -}; +} __no_const; static void cryptd_queue_worker(struct work_struct *work); diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 5b63b8d..6f46ba0 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -26,6 +26,8 @@ #include #include "internal.h" +#define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x)) + DEFINE_MUTEX(crypto_cfg_mutex); /* The crypto netlink socket */ @@ -192,7 +194,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct crypto_dump_info info; int err; - if (!p->cru_driver_name) + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + + if (!p->cru_driver_name[0]) return -EINVAL; alg = crypto_alg_match(p, 1); @@ -256,6 +261,9 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; LIST_HEAD(list); + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + if (priority && !strlen(p->cru_driver_name)) return -EINVAL; @@ -283,6 +291,9 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct crypto_alg *alg; struct crypto_user_alg *p = nlmsg_data(nlh); + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + alg = crypto_alg_match(p, 1); if (!alg) return -ENOENT; @@ -310,6 +321,9 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct crypto_user_alg *p = nlmsg_data(nlh); struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + if (strlen(p->cru_driver_name)) exact = 1; diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index ba92046..2d5921a 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -440,7 +440,7 @@ static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name) int ret; pinst->kobj.kset = pcrypt_kset; - ret = kobject_add(&pinst->kobj, NULL, name); + ret = kobject_add(&pinst->kobj, NULL, "%s", name); if (!ret) kobject_uevent(&pinst->kobj, KOBJ_ADD); diff --git a/crypto/zlib.c b/crypto/zlib.c index d980788..2422b3d 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -95,10 +95,10 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params, zlib_comp_exit(ctx); window_bits = tb[ZLIB_COMP_WINDOWBITS] - ? nla_get_u32(tb[ZLIB_COMP_WINDOWBITS]) + ? nla_get_s32(tb[ZLIB_COMP_WINDOWBITS]) : MAX_WBITS; mem_level = tb[ZLIB_COMP_MEMLEVEL] - ? nla_get_u32(tb[ZLIB_COMP_MEMLEVEL]) + ? nla_get_s32(tb[ZLIB_COMP_MEMLEVEL]) : DEF_MEM_LEVEL; workspacesize = zlib_deflate_workspacesize(window_bits, mem_level); diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index f57050e..7ccfc74 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -18,7 +18,7 @@ typedef int (*apei_exec_ins_func_t)(struct apei_exec_context *ctx, struct apei_exec_ins_type { u32 flags; apei_exec_ins_func_t run; -}; +} __do_const; struct apei_exec_context { u32 ip; diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index 5d41894..22021e4 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c @@ -38,12 +38,12 @@ */ u64 cper_next_record_id(void) { - static atomic64_t seq; + static atomic64_unchecked_t seq; - if (!atomic64_read(&seq)) - atomic64_set(&seq, ((u64)get_seconds()) << 32); + if (!atomic64_read_unchecked(&seq)) + atomic64_set_unchecked(&seq, ((u64)get_seconds()) << 32); - return atomic64_inc_return(&seq); + return atomic64_inc_return_unchecked(&seq); } EXPORT_SYMBOL_GPL(cper_next_record_id); diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c index cfc0cc1..61fdbaa 100644 --- a/drivers/acpi/atomicio.c +++ b/drivers/acpi/atomicio.c @@ -286,6 +286,7 @@ static int acpi_atomic_read_mem(u64 paddr, u64 *val, u32 width) break; #endif default: + rcu_read_unlock(); return -EINVAL; } rcu_read_unlock(); @@ -315,6 +316,7 @@ static int acpi_atomic_write_mem(u64 paddr, u64 val, u32 width) break; #endif default: + rcu_read_unlock(); return -EINVAL; } rcu_read_unlock(); diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index cb96296..b81293b 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -52,7 +52,7 @@ struct acpi_blacklist_item { u32 is_critical_error; }; -static struct dmi_system_id acpi_osi_dmi_table[] __initdata; +static const struct dmi_system_id acpi_osi_dmi_table[] __initconst; /* * POLICY: If *anything* doesn't work, put it on the blacklist. @@ -193,7 +193,7 @@ static int __init dmi_disable_osi_win7(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { +static const struct dmi_system_id acpi_osi_dmi_table[] __initconst = { { .callback = dmi_disable_osi_vista, .ident = "Fujitsu Siemens", diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 3a0e92a..4348f2a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -72,7 +72,7 @@ static int set_copy_dsdt(const struct dmi_system_id *id) } #endif -static struct dmi_system_id dsdt_dmi_table[] __initdata = { +static const struct dmi_system_id dsdt_dmi_table[] __initconst = { /* * Invoke DSDT corruption work-around on all Toshiba Satellite. * https://bugzilla.kernel.org/show_bug.cgi?id=14679 @@ -88,7 +88,7 @@ static struct dmi_system_id dsdt_dmi_table[] __initdata = { {} }; #else -static struct dmi_system_id dsdt_dmi_table[] __initdata = { +static const struct dmi_system_id dsdt_dmi_table[] __initconst = { {} }; #endif diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c index 5d42c24..4964b94 100644 --- a/drivers/acpi/custom_method.c +++ b/drivers/acpi/custom_method.c @@ -29,6 +29,10 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf, struct acpi_table_header table; acpi_status status; +#ifdef CONFIG_GRKERNSEC_KMEM + return -EPERM; +#endif + if (!(*ppos)) { /* parse the table header to get the table length */ if (count <= sizeof(struct acpi_table_header)) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 30229af..eaddfaf 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1018,7 +1018,7 @@ static int ec_clear_on_resume(const struct dmi_system_id *id) return 0; } -static struct dmi_system_id __initdata ec_dmi_table[] = { +static const struct dmi_system_id __initconst ec_dmi_table[] = { { ec_skip_dsdt_scan, "Compal JFL92", { DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), diff --git a/drivers/acpi/ec_sys.c b/drivers/acpi/ec_sys.c index 6c47ae9..abfdd63 100644 --- a/drivers/acpi/ec_sys.c +++ b/drivers/acpi/ec_sys.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internal.h" MODULE_AUTHOR("Thomas Renninger "); @@ -40,7 +41,7 @@ static ssize_t acpi_ec_read_io(struct file *f, char __user *buf, * struct acpi_ec *ec = ((struct seq_file *)f->private_data)->private; */ unsigned int size = EC_SPACE_SIZE; - u8 *data = (u8 *) buf; + u8 data; loff_t init_off = *off; int err = 0; @@ -53,9 +54,11 @@ static ssize_t acpi_ec_read_io(struct file *f, char __user *buf, size = count; while (size) { - err = ec_read(*off, &data[*off - init_off]); + err = ec_read(*off, &data); if (err) return err; + if (put_user(data, &buf[*off - init_off])) + return -EFAULT; *off += 1; size--; } @@ -71,7 +74,6 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf, unsigned int size = count; loff_t init_off = *off; - u8 *data = (u8 *) buf; int err = 0; if (*off >= EC_SPACE_SIZE) @@ -82,7 +84,9 @@ static ssize_t acpi_ec_write_io(struct file *f, const char __user *buf, } while (size) { - u8 byte_write = data[*off - init_off]; + u8 byte_write; + if (get_user(byte_write, &buf[*off - init_off])) + return -EFAULT; err = ec_write(*off, byte_write); if (err) return err; diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index 07f7fea..118388f 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -336,7 +336,7 @@ static int do_sta_before_sun(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id acpi_pci_slot_dmi_table[] __initdata = { +static const struct dmi_system_id acpi_pci_slot_dmi_table[] __initconst = { /* * Fujitsu Primequest machines will return 1023 to indicate an * error if the _SUN method is evaluated on SxFy objects that diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index 251c7b62..feab1d6 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -345,16 +345,13 @@ acpi_system_write_wakeup_device(struct file *file, struct list_head *node, *next; char strbuf[5]; char str[5] = ""; - unsigned int len = count; - if (len > 4) - len = 4; - if (len < 0) - return -EFAULT; + if (count > 4) + count = 4; - if (copy_from_user(strbuf, buffer, len)) + if (copy_from_user(strbuf, buffer, count)) return -EFAULT; - strbuf[len] = '\0'; + strbuf[count] = '\0'; sscanf(strbuf, "%s", str); mutex_lock(&acpi_device_lock); diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index eff7222..fea8ae4 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -28,7 +28,7 @@ static int __init set_no_mwait(const struct dmi_system_id *id) return 0; } -static struct dmi_system_id __initdata processor_idle_dmi_table[] = { +static const struct dmi_system_id __initconst processor_idle_dmi_table[] = { { set_no_mwait, "Extensa 5220", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index ac28db3..0848b37 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -474,7 +474,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device) return 0; #endif - BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0)); + BUG_ON(pr->id >= nr_cpu_ids); /* * Buggy BIOS check diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 388ba10..d509dbb 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1036,7 +1036,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) { int i, count = CPUIDLE_DRIVER_STATE_START; struct acpi_processor_cx *cx; - struct cpuidle_state *state; + cpuidle_state_no_const *state; struct cpuidle_driver *drv = &acpi_idle_driver; if (!pr->flags.power_setup_done) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index cc9d020..b72fc03 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -120,7 +120,7 @@ static int __init init_nvs_nosave(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id __initdata acpisleep_dmi_table[] = { +static const struct dmi_system_id __initconst acpisleep_dmi_table[] = { { .callback = init_old_suspend_ordering, .ident = "Abit KN9 (nForce4 variant)", diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 240a244..bc6239e 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -420,11 +420,11 @@ static u32 num_counters; static struct attribute **all_attrs; static u32 acpi_gpe_count; -static struct attribute_group interrupt_stats_attr_group = { +static attribute_group_no_const interrupt_stats_attr_group = { .name = "interrupts", }; -static struct kobj_attribute *counter_attrs; +static kobj_attribute_no_const *counter_attrs; static void delete_gpe_attr_array(void) { diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 48fbc64..d0a31d4 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -1110,7 +1110,7 @@ static int thermal_psv(const struct dmi_system_id *d) { return 0; } -static struct dmi_system_id thermal_dmi_table[] __initdata = { +static const struct dmi_system_id thermal_dmi_table[] __initconst = { /* * Award BIOS on this AOpen makes thermal control almost worthless. * http://bugzilla.kernel.org/show_bug.cgi?id=8842 diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index cb842a8..6688e24 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -395,7 +395,7 @@ static int video_ignore_initial_backlight(const struct dmi_system_id *d) return 0; } -static struct dmi_system_id video_dmi_table[] __initdata = { +static const struct dmi_system_id video_dmi_table[] __initconst = { /* * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121 */ diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index de2802c..2260da9 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1211,7 +1211,7 @@ int ahci_kick_engine(struct ata_port *ap) } EXPORT_SYMBOL_GPL(ahci_kick_engine); -static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp, +static int __intentional_overflow(-1) ahci_exec_polled_cmd(struct ata_port *ap, int pmp, struct ata_taskfile *tf, int is_cmd, u16 flags, unsigned long timeout_msec) { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 5d8fc3d..d537f03 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4790,7 +4790,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) struct ata_port *ap; unsigned int tag; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ ap = qc->ap; qc->flags = 0; @@ -4806,7 +4806,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) struct ata_port *ap; struct ata_link *link; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE)); ap = qc->ap; link = qc->dev->link; @@ -5811,6 +5811,7 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops) return; spin_lock(&lock); + pax_open_kernel(); for (cur = ops->inherits; cur; cur = cur->inherits) { void **inherit = (void **)cur; @@ -5824,8 +5825,9 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops) if (IS_ERR(*pp)) *pp = NULL; - ops->inherits = NULL; + *(struct ata_port_operations **)&ops->inherits = NULL; + pax_close_kernel(); spin_unlock(&lock); } diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index e8574bb..f9f6a72 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -862,7 +862,9 @@ static int __devinit arasan_cf_probe(struct platform_device *pdev) /* Handle platform specific quirks */ if (pdata->quirk) { if (pdata->quirk & CF_BROKEN_PIO) { - ap->ops->set_piomode = NULL; + pax_open_kernel(); + *(void **)&ap->ops->set_piomode = NULL; + pax_close_kernel(); ap->pio_mask = 0; } if (pdata->quirk & CF_BROKEN_MWDMA) diff --git a/drivers/atm/adummy.c b/drivers/atm/adummy.c index f9b983a..887b9d8 100644 --- a/drivers/atm/adummy.c +++ b/drivers/atm/adummy.c @@ -114,7 +114,7 @@ adummy_send(struct atm_vcc *vcc, struct sk_buff *skb) vcc->pop(vcc, skb); else dev_kfree_skb_any(skb); - atomic_inc(&vcc->stats->tx); + atomic_inc_unchecked(&vcc->stats->tx); return 0; } diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 89b30f3..7964211d4 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -454,7 +454,7 @@ static void tx_complete (amb_dev * dev, tx_out * tx) { PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx); // VC layer stats - atomic_inc(&ATM_SKB(skb)->vcc->stats->tx); + atomic_inc_unchecked(&ATM_SKB(skb)->vcc->stats->tx); // free the descriptor kfree (tx_descr); @@ -495,7 +495,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { dump_skb ("<<<", vc, skb); // VC layer stats - atomic_inc(&atm_vcc->stats->rx); + atomic_inc_unchecked(&atm_vcc->stats->rx); __net_timestamp(skb); // end of our responsibility atm_vcc->push (atm_vcc, skb); @@ -510,7 +510,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { } else { PRINTK (KERN_INFO, "dropped over-size frame"); // should we count this? - atomic_inc(&atm_vcc->stats->rx_drop); + atomic_inc_unchecked(&atm_vcc->stats->rx_drop); } } else { @@ -1338,7 +1338,7 @@ static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) { } if (check_area (skb->data, skb->len)) { - atomic_inc(&atm_vcc->stats->tx_err); + atomic_inc_unchecked(&atm_vcc->stats->tx_err); return -ENOMEM; // ? } diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b22d71c..d6e1049 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -207,7 +207,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb) if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); if (dev_data) return 0; - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -ENOLINK; } size = skb->len+sizeof(struct atmtcp_hdr); @@ -215,7 +215,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb) if (!new_skb) { if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); return -ENOBUFS; } hdr = (void *) skb_put(new_skb,sizeof(struct atmtcp_hdr)); @@ -226,8 +226,8 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb) if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); out_vcc->push(out_vcc,new_skb); - atomic_inc(&vcc->stats->tx); - atomic_inc(&out_vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->tx); + atomic_inc_unchecked(&out_vcc->stats->rx); return 0; } @@ -301,7 +301,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) out_vcc = find_vcc(dev, ntohs(hdr->vpi), ntohs(hdr->vci)); read_unlock(&vcc_sklist_lock); if (!out_vcc) { - atomic_inc(&vcc->stats->tx_err); + atomic_inc_unchecked(&vcc->stats->tx_err); goto done; } skb_pull(skb,sizeof(struct atmtcp_hdr)); @@ -313,8 +313,8 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) __net_timestamp(new_skb); skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len); out_vcc->push(out_vcc,new_skb); - atomic_inc(&vcc->stats->tx); - atomic_inc(&out_vcc->stats->rx); + atomic_inc_unchecked(&vcc->stats->tx); + atomic_inc_unchecked(&out_vcc->stats->rx); done: if (vcc->pop) vcc->pop(vcc,skb); else dev_kfree_skb(skb); diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 956e9ac..133516d 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -526,7 +526,7 @@ static int rx_aal0(struct atm_vcc *vcc) DPRINTK(DEV_LABEL "(itf %d): trashing empty cell\n", vcc->dev->number); length = 0; - atomic_inc(&vcc->stats->rx_err); + atomic_inc_unchecked(&vcc->stats->rx_err); } else { length = ATM_CELL_SIZE-1; /* no HEC */ @@ -581,7 +581,7 @@ static int rx_aal5(struct atm_vcc *vcc) size); } eff = length = 0; - atomic_inc(&vcc-