[PATCH 1/1] aarch64: support PAC and BTI
Bill Roberts
bill.roberts at arm.com
Mon Aug 12 20:54:35 CEST 2024
Enable Pointer Authentication Codes (PAC) and Branch Target
Identification (BTI) support for ARM 64 targets.
PAC works by signing the LR with either an A key or B key and verifying
the return address. There are quite a few instructions capable of doing
this, however, the Linux ARM ABI is to use hint compatible instructions
that can be safely NOP'd on older hardware and can be assembled and
linked with older binutils. This limits the instruction set to paciasp,
pacibsp, autiasp and autibsp. Instructions prefixed with pac are for
signing and instructions prefixed with aut are for signing. Both
instructions are then followed with an a or b to indicate which signing
key they are using. The keys can be controlled using
-mbranch-protection=pac-ret for the A key and
-mbranch-protection=pac-ret+b-key for the B key.
BTI works by marking all call and jump positions with bti c and bti
j instructions. If execution control transfers to an instruction other
than a BTI instruction, the execution is killed via SIGILL. Note that
to remove one instruction, the aforementioned pac instructions will
also work as a BTI landing pad for bti c usages.
For BTI to work, all object files linked for a unit of execution,
whether an executable or a library must have the GNU Notes section of
the ELF file marked to indicate BTI support. This is so loader/linkers
can apply the proper permission bits (PROT_BRI) on the memory region.
PAC can also be annotated in the GNU ELF notes section, but it's not
required for enablement, as interleaved PAC and non-pac code works as
expected since it's the callee that performs all the checking. The
linker follows the same rules as BTI for discarding the PAC flag from
the GNU Notes section.
Testing was done under the following CFLAGS and CXXFLAGS for all
combinations:
1. -mbranch-protection=none
2. -mbranch-protection=standard
3. -mbranch-protection=pac-ret
4. -mbranch-protection=pac-ret+b-key
5. -mbranch-protection=bti
Additional Notes:
MPN was handled differently then the standard approach of all PROLOGUES
getting a SIGN_LR macro. This is becuase MPN does not make use of
saving the x30, aka the link regeister (LR), to the stack in almost all
instances. However, some functions do, and they were explicitly handled.
This not only avoids the cost of the operations to sign and verify the
LR but also handles instances where branches are taken to labels where
indirect branches are used over branch and link to optimize the assembly.
Also, within the configure.ac are a myriad of options for different
architectures, chipsets, ABIs, etc. To compound that, additional
architecture specifiec features could be enabled with in CFLAGS that
needs to be respected in-order to get a correct output. For instance in
aarch64, the PAC and BTI instructions need to be output in the generated
assembly as well as the GNU notes section added to the ELF output to get
those security features. Hacking it into the configure options seems
baroque, especially considering that distro packaging will often just
set a set of CFLAGS to be respected and move on and that's most users
would expect. Taking this all into consideration, allowing for a per
architecture script that can be executed to generate additional m4
allows for internal definitions, like in the PAC case, to be exposed, or
any multitude of options if other archs need somethng like this. This
introduces the variable gen_path_m4 that arch's can set to the script of
their choosing to generate whatever m4 they need that is prepended to
the m4 generation command after the defines.
Signed-off-by: Bill Roberts <bill.roberts at arm.com>
---
configure.ac | 12 +++++
mpn/Makeasm.am | 3 +-
mpn/arm64/aors_n.asm | 4 ++
mpn/arm64/aorsmul_1.asm | 3 ++
mpn/arm64/aorsorrlsh1_n.asm | 2 +
mpn/arm64/aorsorrlsh2_n.asm | 2 +
mpn/arm64/aorsorrlshC_n.asm | 1 +
mpn/arm64/arm64-defs.m4 | 67 ++++++++++++++++++++++++++++
mpn/arm64/bdiv_dbm1c.asm | 2 +
mpn/arm64/bdiv_q_1.asm | 3 ++
mpn/arm64/cnd_aors_n.asm | 3 ++
mpn/arm64/com.asm | 2 +
mpn/arm64/copyd.asm | 2 +
mpn/arm64/copyi.asm | 2 +
mpn/arm64/divrem_1.asm | 9 ++++
mpn/arm64/gcd_11.asm | 2 +
mpn/arm64/gcd_22.asm | 2 +
mpn/arm64/gen-extra-m4.sh | 81 ++++++++++++++++++++++++++++++++++
mpn/arm64/hamdist.asm | 7 ++-
mpn/arm64/invert_limb.asm | 2 +
mpn/arm64/logops_n.asm | 3 ++
mpn/arm64/lshift.asm | 2 +
mpn/arm64/lshiftc.asm | 2 +
mpn/arm64/mod_34lsub1.asm | 2 +
mpn/arm64/mul_1.asm | 3 ++
mpn/arm64/popcount.asm | 8 +++-
mpn/arm64/rsh1aors_n.asm | 3 ++
mpn/arm64/rshift.asm | 2 +
mpn/arm64/sec_tabselect.asm | 2 +
mpn/arm64/sqr_diag_addlsh1.asm | 2 +
mpn/m4-ccas | 23 ++++++++--
31 files changed, 255 insertions(+), 8 deletions(-)
create mode 100755 mpn/arm64/gen-extra-m4.sh
diff --git a/configure.ac b/configure.ac
index c3a4a9bf8..83a73f3a0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -473,6 +473,11 @@ cc_64_cflags="-O"
SPEED_CYCLECOUNTER_OBJ=
cyclecounter_size=2
+# architectures can set this to add defines dynamically to m4 generation.
+# For example, in arm64 it is used to determine if PAC and BTI are enabled
+# and enable generation of those instructions in m4 asm.
+gen_path_m4=
+
AC_SUBST(HAVE_HOST_CPU_FAMILY_power, 0)
AC_SUBST(HAVE_HOST_CPU_FAMILY_powerpc,0)
@@ -781,6 +786,7 @@ case $host in
gcc_cflags_arch="-march=armv8-a"
gcc_cflags_neon="-mfpu=neon"
gcc_cflags_tune=""
+ gen_path_m4="arm64/gen-extra-m4.sh"
;;
[applem[1-9]*])
abilist="64"
@@ -4051,6 +4057,12 @@ fi
AC_PROG_YACC
AM_PROG_LEX
+# This may appear odd, however prefixing with m4 is
+# reserved in m4/autoconf but not in automake and
+# beyond. The prefixed version matches things like
+# gcc_c_flags.
+AC_SUBST([M4_GEN_PATH], [$gen_path_m4])
+
# Create config.m4.
GMP_FINISH
diff --git a/mpn/Makeasm.am b/mpn/Makeasm.am
index 5d7306c22..bfdc632fe 100644
--- a/mpn/Makeasm.am
+++ b/mpn/Makeasm.am
@@ -115,4 +115,5 @@ RM_TMP = rm -f
$(CCAS) $(COMPILE_FLAGS) tmp-$*.s -o $@
$(RM_TMP) tmp-$*.s
.asm.lo:
- $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+ $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4-gen-path=$(top_srcdir)/mpn/$(M4_GEN_PATH) --m4="$(M4)" \
+ $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
diff --git a/mpn/arm64/aors_n.asm b/mpn/arm64/aors_n.asm
index b4a6da6ff..a5b542d4d 100644
--- a/mpn/arm64/aors_n.asm
+++ b/mpn/arm64/aors_n.asm
@@ -60,13 +60,16 @@ ifdef(`OPERATION_sub_n', `
define(`func_nc', mpn_sub_nc)')
MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+ BTI_C
ASM_START()
PROLOGUE(func_nc)
+ BTI_C
SETCY( x4)
b L(ent)
EPILOGUE()
PROLOGUE(func_n)
+ BTI_C
CLRCY
L(ent): lsr x17, n, #2
tbz n, #0, L(bx0)
@@ -123,3 +126,4 @@ L(end): ADDSUBC x12, x6, x10
L(ret): RETVAL
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/aorsmul_1.asm b/mpn/arm64/aorsmul_1.asm
index 81ec1dabb..05091330d 100644
--- a/mpn/arm64/aorsmul_1.asm
+++ b/mpn/arm64/aorsmul_1.asm
@@ -68,8 +68,10 @@ ifdef(`OPERATION_submul_1', `
define(`func', mpn_submul_1)')
MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+ BTI_C
PROLOGUE(func)
+ BTI_C
adds x15, xzr, xzr
tbz n, #0, L(1)
@@ -143,3 +145,4 @@ L(mid): sub n, n, #1
csinc x0, x15, x15, COND
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/aorsorrlsh1_n.asm b/mpn/arm64/aorsorrlsh1_n.asm
index c617a67a9..2de3ff992 100644
--- a/mpn/arm64/aorsorrlsh1_n.asm
+++ b/mpn/arm64/aorsorrlsh1_n.asm
@@ -39,5 +39,7 @@ ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
+ BTI_C
include_mpn(`arm64/aorsorrlshC_n.asm')
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/aorsorrlsh2_n.asm b/mpn/arm64/aorsorrlsh2_n.asm
index 852d11720..2161ae2a9 100644
--- a/mpn/arm64/aorsorrlsh2_n.asm
+++ b/mpn/arm64/aorsorrlsh2_n.asm
@@ -39,5 +39,7 @@ ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
+ BTI_C
include_mpn(`arm64/aorsorrlshC_n.asm')
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/aorsorrlshC_n.asm b/mpn/arm64/aorsorrlshC_n.asm
index 1718b7757..97df8c6f0 100644
--- a/mpn/arm64/aorsorrlshC_n.asm
+++ b/mpn/arm64/aorsorrlshC_n.asm
@@ -65,6 +65,7 @@ ifdef(`DO_rsb', `
ASM_START()
PROLOGUE(func_n)
+ BTI_C
lsr x6, n, #2
tbz n, #0, L(bx0)
diff --git a/mpn/arm64/arm64-defs.m4 b/mpn/arm64/arm64-defs.m4
index 46149f7bf..d0ad4b63c 100644
--- a/mpn/arm64/arm64-defs.m4
+++ b/mpn/arm64/arm64-defs.m4
@@ -36,6 +36,73 @@ dnl don't want to disable macro expansions in or after them.
changecom
+dnl use the hint instructions so they NOP on older machines.
+dnl Add comments so the assembly is notated with the instruction
+
+
+define(`BTI_C', `hint #34 /* bti c */')
+define(`PACIASP', `hint #25 /* paciasp */')
+define(`AUTIASP', `hint #29 /* autiasp */')
+define(`PACIBSP', `hint #27 /* pacibsp */')
+define(`AUTIBSP', `hint #31 /* autibsp */')
+
+dnl if BTI is enabled we want the SIGN_LR to be a valid
+dnl landing pad, we don't need VERIFY_LR and we need to
+dnl indicate the valid BTI support for gnu notes.
+
+
+ifelse(ARM64_FEATURE_BTI_DEFAULT, `1',
+ `define(`SIGN_LR', `BTI_C')
+ define(`GNU_PROPERTY_AARCH64_BTI', `1')
+ define(`PAC_OR_BTI')',
+ define(`GNU_PROPERTY_AARCH64_BTI', `0')'
+')
+
+dnl define instructions for PAC, which can use the A
+dnl or the B key. PAC instructions are also valid BTI
+dnl landing pads, so we re-define SIGN_LR if BTI is
+dnl enabled.
+
+
+ifelse(ARM64_FEATURE_PAC_DEFAULT, `1',
+ `define(`SIGN_LR', `PACIASP')
+ define(`VERIFY_LR', `AUTIASP')
+ define(`GNU_PROPERTY_AARCH64_POINTER_AUTH', `2')
+ define(`PAC_OR_BTI')',
+ ARM64_FEATURE_PAC_DEFAULT, `2',
+ `define(`SIGN_LR', `PACIBSP')
+ define(`VERIFY_LR', `AUTIBSP')
+ define(`GNU_PROPERTY_AARCH64_POINTER_AUTH', `2')
+ define(`PAC_OR_BTI')',
+ `ifdef(`SIGN_LR', , `define(`SIGN_LR', `')')
+ define(`VERIFY_LR', `')
+ define(`GNU_PROPERTY_AARCH64_POINTER_AUTH', `0')'
+')
+
+dnl ADD_GNU_NOTES_IF_NEEDED
+dnl
+dnl Conditionally add into ELF assembly files the GNU notes indicating if
+dnl BTI or PAC is support. BTI is required by the linkers and loaders, however
+dnl PAC is a nice to have for auditing. Use readelf -n to display.
+
+
+define(`ADD_GNU_NOTES_IF_NEEDED', `
+ ifdef(`ARM64_ELF', `
+ ifdef(`PAC_OR_BTI', `
+ .pushsection .note.gnu.property, "a";
+ .balign 8;
+ .long 4;
+ .long 0x10;
+ .long 0x5;
+ .asciz "GNU";
+ .long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+ .long 4;
+ .long eval(indir(`GNU_PROPERTY_AARCH64_POINTER_AUTH') + indir(`GNU_PROPERTY_AARCH64_BTI'));
+ .long 0;
+ .popsection;
+ ')
+ ')
+')
dnl LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
dnl
diff --git a/mpn/arm64/bdiv_dbm1c.asm b/mpn/arm64/bdiv_dbm1c.asm
index 78984b426..9f15f8e59 100644
--- a/mpn/arm64/bdiv_dbm1c.asm
+++ b/mpn/arm64/bdiv_dbm1c.asm
@@ -45,6 +45,7 @@ ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_bdiv_dbm1c)
+ BTI_C
ldr x5, [up], #8
ands x6, n, #3
b.eq L(fi0)
@@ -109,3 +110,4 @@ L(wd1): subs x4, x4, x12
sbc x0, x4, x13
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/bdiv_q_1.asm b/mpn/arm64/bdiv_q_1.asm
index 7fffc9369..401227a83 100644
--- a/mpn/arm64/bdiv_q_1.asm
+++ b/mpn/arm64/bdiv_q_1.asm
@@ -56,6 +56,7 @@ define(`tnc', `x8')
ASM_START()
PROLOGUE(mpn_bdiv_q_1)
+ BTI_C
rbit x6, d
clz cnt, x6
@@ -79,6 +80,7 @@ PROLOGUE(mpn_bdiv_q_1)
EPILOGUE()
PROLOGUE(mpn_pi1_bdiv_q_1)
+ BTI_C
sub n, n, #1
subs x6, x6, x6 C clear r6 and C flag
ldr x9, [up],#8
@@ -120,3 +122,4 @@ L(tpn): ldr x9, [up],#8
L(en1): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/cnd_aors_n.asm b/mpn/arm64/cnd_aors_n.asm
index 397aa5100..916708885 100644
--- a/mpn/arm64/cnd_aors_n.asm
+++ b/mpn/arm64/cnd_aors_n.asm
@@ -57,9 +57,11 @@ ifdef(`OPERATION_cnd_sub_n', `
define(`func', mpn_cnd_sub_n)')
MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
+ BTI_C
ASM_START()
PROLOGUE(func)
+ BTI_C
cmp cnd, #1
sbc cnd, cnd, cnd
@@ -127,3 +129,4 @@ L(end): bic x6, x12, cnd
L(rt): RETVAL
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/com.asm b/mpn/arm64/com.asm
index d59494380..82b6787bf 100644
--- a/mpn/arm64/com.asm
+++ b/mpn/arm64/com.asm
@@ -47,6 +47,7 @@ define(`n', `x2')
ASM_START()
PROLOGUE(mpn_com)
+ BTI_C
cmp n, #3
b.le L(bc)
@@ -90,3 +91,4 @@ L(tl1): tbz n, #0, L(tl2)
str x4, [rp]
L(tl2): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/copyd.asm b/mpn/arm64/copyd.asm
index d542970b7..b221d23a8 100644
--- a/mpn/arm64/copyd.asm
+++ b/mpn/arm64/copyd.asm
@@ -47,6 +47,7 @@ define(`n', `x2')
ASM_START()
PROLOGUE(mpn_copyd)
+ BTI_C
add rp, rp, n, lsl #3
add up, up, n, lsl #3
@@ -83,3 +84,4 @@ L(tl1): tbz n, #0, L(tl2)
str x4, [rp,#-8]
L(tl2): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/copyi.asm b/mpn/arm64/copyi.asm
index 0de40c5d7..360266c67 100644
--- a/mpn/arm64/copyi.asm
+++ b/mpn/arm64/copyi.asm
@@ -47,6 +47,7 @@ define(`n', `x2')
ASM_START()
PROLOGUE(mpn_copyi)
+ BTI_C
cmp n, #3
b.le L(bc)
@@ -80,3 +81,4 @@ L(tl1): tbz n, #0, L(tl2)
str x4, [rp]
L(tl2): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/divrem_1.asm b/mpn/arm64/divrem_1.asm
index 9d5bb5959..2bb8850d9 100644
--- a/mpn/arm64/divrem_1.asm
+++ b/mpn/arm64/divrem_1.asm
@@ -66,6 +66,8 @@ dnl mp_limb_t d_unnorm, mp_limb_t dinv, int cnt)
ASM_START()
PROLOGUE(mpn_preinv_divrem_1)
+ BTI_C
+ SIGN_LR
cbz n_arg, L(fz)
stp x29, x30, [sp, #-80]!
mov x29, sp
@@ -86,6 +88,8 @@ PROLOGUE(mpn_preinv_divrem_1)
EPILOGUE()
PROLOGUE(mpn_divrem_1)
+ BTI_C
+ SIGN_LR
cbz n_arg, L(fz)
stp x29, x30, [sp, #-80]!
mov x29, sp
@@ -154,6 +158,7 @@ L(uend):add x2, x11, #1
ldp x21, x22, [sp, #32]
ldp x23, x24, [sp, #48]
ldp x29, x30, [sp], #80
+ VERIFY_LR
ret
L(ufx): add x2, x2, #1
@@ -194,6 +199,7 @@ L(nend):cbnz fn, L(frac)
ldp x21, x22, [sp, #32]
ldp x23, x24, [sp, #48]
ldp x29, x30, [sp], #80
+ VERIFY_LR
ret
L(nfx): add x2, x2, #1
@@ -219,6 +225,7 @@ L(ftop):add x2, x11, #1
ldp x21, x22, [sp, #32]
ldp x23, x24, [sp, #48]
ldp x29, x30, [sp], #80
+ VERIFY_LR
ret
C Block zero. We need this for the degenerated case of n = 0, fn != 0.
@@ -227,5 +234,7 @@ L(ztop):str xzr, [qp_arg], #8
sub fn_arg, fn_arg, #1
cbnz fn_arg, L(ztop)
L(zend):mov x0, #0
+ VERIFY_LR
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/gcd_11.asm b/mpn/arm64/gcd_11.asm
index d8cc3e2cf..5e18fa21b 100644
--- a/mpn/arm64/gcd_11.asm
+++ b/mpn/arm64/gcd_11.asm
@@ -54,6 +54,7 @@ ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_gcd_11)
+ BTI_C
subs x3, u0, v0 C 0
b.eq L(end) C
@@ -68,3 +69,4 @@ L(top): rbit x12, x3 C 1,5
L(end): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/gcd_22.asm b/mpn/arm64/gcd_22.asm
index 5367fea02..4a0b902b7 100644
--- a/mpn/arm64/gcd_22.asm
+++ b/mpn/arm64/gcd_22.asm
@@ -56,6 +56,7 @@ define(`tnc', `x8')
ASM_START()
PROLOGUE(mpn_gcd_22)
+ BTI_C
ALIGN(16)
L(top): subs t0, u0, v0 C 0 6
@@ -110,3 +111,4 @@ L(end): mov x0, v0
mov x1, v1
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/gen-extra-m4.sh b/mpn/arm64/gen-extra-m4.sh
new file mode 100755
index 000000000..09c5b8975
--- /dev/null
+++ b/mpn/arm64/gen-extra-m4.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+#
+# A script for dynamically generating m4 definitions for aarch64 based on compilation flags.
+#
+# Copyright 2024 ARM Ltd.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of either:
+#
+# * the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# or
+#
+# * the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# or both in parallel, as here.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received copies of the GNU General Public License and the
+# GNU Lesser General Public License along with the GNU MP Library. If not,
+# see https://www.gnu.org/licenses/.
+
+# Usage: ./gen-extra-m4.sh "$CC"
+# Returns: valid M4 to stdout.
+
+if test "$#" -ne 1; then
+ echo "Expected 1 argument, the CC. Got: $#"
+ exit 1
+fi
+
+CC=$1
+
+ARM64_FEATURE_BTI_DEFAULT="0"
+ARM64_FEATURE_PAC_DEFAULT="0"
+ARM64_ELF="0"
+
+# strip -o from CC line so -dM works
+_CC=$(echo "$CC" | sed 's/-o [^ ]*//')
+output=$($_CC -dM -E - < /dev/null || exit $?)
+while IFS= read -r line; do
+ # Skip empty lines
+ if test -z "$line"; then
+ continue
+ fi
+ # Match the #define pattern and extract the macro name and value
+ case "$line" in
+ \#define\ *\ *)
+ macro_name=`echo "$line" | awk '{print $2}'`
+ macro_value=`echo "$line" | cut -d ' ' -f 3- | sed 's/^"\(.*\)"$/\1/'`
+ # map's would be nice in POSIX shell, could use eval to simplify, but
+ # I won't do that to others.
+ case "$macro_name" in
+ __ARM_FEATURE_BTI_DEFAULT)
+ ARM64_FEATURE_BTI_DEFAULT="$macro_value"
+ ;;
+ __ARM_FEATURE_PAC_DEFAULT)
+ ARM64_FEATURE_PAC_DEFAULT="$macro_value"
+ ;;
+ __ELF__)
+ ARM64_ELF="$macro_value"
+ ;;
+ esac # end assignments
+ ;;
+ esac # end define
+done <<< "$output"
+
+# Output the M4 define statement. To make m4 simpler always output something so we can
+# use an ifelse without needing to nest it within an ifdef.
+echo "define(\`ARM64_FEATURE_BTI_DEFAULT', \`$ARM64_FEATURE_BTI_DEFAULT')"
+echo "define(\`ARM64_FEATURE_PAC_DEFAULT', \`$ARM64_FEATURE_PAC_DEFAULT')"
+echo "define(\`ARM64_ELF', \`$ARM64_ELF')"
diff --git a/mpn/arm64/hamdist.asm b/mpn/arm64/hamdist.asm
index c72ca55b3..418519458 100644
--- a/mpn/arm64/hamdist.asm
+++ b/mpn/arm64/hamdist.asm
@@ -60,12 +60,13 @@ define(`chunksize',0x1ff0)
ASM_START()
PROLOGUE(mpn_hamdist)
+ BTI_C
mov x11, #maxsize
cmp n, x11
b.hi L(gt8k)
-L(lt8k):
+L(lt8k): BTI_C
movi v4.16b, #0 C clear summation register
movi v5.16b, #0 C clear summation register
@@ -103,7 +104,8 @@ L(gt4): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
L(000): subs n, n, #8
b.lo L(e0)
-L(chu): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+L(chu): BTI_C
+ ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
ld1 {v18.2d,v19.2d}, [bp], #32 C load 4 limbs
ld1 {v16.2d,v17.2d}, [bp], #32 C load 4 limbs
@@ -179,3 +181,4 @@ L(gt8k):
mov x30, x8
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/invert_limb.asm b/mpn/arm64/invert_limb.asm
index 6a99bf002..a42a3c751 100644
--- a/mpn/arm64/invert_limb.asm
+++ b/mpn/arm64/invert_limb.asm
@@ -40,6 +40,7 @@ C Compiler generated, mildly edited. Could surely be further optimised.
ASM_START()
PROLOGUE(mpn_invert_limb)
+ BTI_C
lsr x2, x0, #54
LEA_HI( x1, approx_tab)
and x2, x2, #0x1fe
@@ -81,3 +82,4 @@ approx_tab:
forloop(i,256,512-1,dnl
` .hword eval(0x7fd00/i)
')dnl
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/logops_n.asm b/mpn/arm64/logops_n.asm
index e959abc71..c3400c760 100644
--- a/mpn/arm64/logops_n.asm
+++ b/mpn/arm64/logops_n.asm
@@ -75,9 +75,11 @@ ifdef(`OPERATION_xnor_n',`
define(`LOGOP', `eon $1, $2, $3')')
MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+ BTI_C
ASM_START()
PROLOGUE(func)
+ BTI_C
lsr x17, n, #2
tbz n, #0, L(bx0)
@@ -137,3 +139,4 @@ L(end): LOGOP( x12, x6, x10)
stp x12, x13, [rp]
L(ret): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/lshift.asm b/mpn/arm64/lshift.asm
index fe8a1aa18..a0cf9a3db 100644
--- a/mpn/arm64/lshift.asm
+++ b/mpn/arm64/lshift.asm
@@ -58,6 +58,7 @@ define(`NSHIFT', lsr)
ASM_START()
PROLOGUE(mpn_lshift)
+ BTI_C
add rp, rp_arg, n, lsl #3
add up, up, n, lsl #3
sub tnc, xzr, cnt
@@ -136,3 +137,4 @@ L(end): orr x10, x10, x13
str x2, [rp,#-24]
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/lshiftc.asm b/mpn/arm64/lshiftc.asm
index 6bf584400..5880912de 100644
--- a/mpn/arm64/lshiftc.asm
+++ b/mpn/arm64/lshiftc.asm
@@ -58,6 +58,7 @@ define(`NSHIFT', lsr)
ASM_START()
PROLOGUE(mpn_lshiftc)
+ BTI_C
add rp, rp_arg, n, lsl #3
add up, up, n, lsl #3
sub tnc, xzr, cnt
@@ -139,3 +140,4 @@ L(end): eon x10, x10, x13
str x2, [rp,#-24]
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/mod_34lsub1.asm b/mpn/arm64/mod_34lsub1.asm
index 7945fe72c..ac84675b7 100644
--- a/mpn/arm64/mod_34lsub1.asm
+++ b/mpn/arm64/mod_34lsub1.asm
@@ -62,6 +62,7 @@ ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_mod_34lsub1)
+ BTI_C
subs n, n, #3
mov x8, #0
b.lt L(le2) C n <= 2
@@ -122,3 +123,4 @@ L(1): ldr x2, [ap]
add x0, x0, x2, lsr #48
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/mul_1.asm b/mpn/arm64/mul_1.asm
index fb965efff..87760191d 100644
--- a/mpn/arm64/mul_1.asm
+++ b/mpn/arm64/mul_1.asm
@@ -51,11 +51,13 @@ define(`v0', `x3')
PROLOGUE(mpn_mul_1c)
+ BTI_C
adds xzr, xzr, xzr C clear cy flag
b L(com)
EPILOGUE()
PROLOGUE(mpn_mul_1)
+ BTI_C
adds x4, xzr, xzr C clear register and cy flag
L(com): lsr x17, n, #2
tbnz n, #0, L(bx1)
@@ -126,3 +128,4 @@ L(2e): adcs x12, x8, x11
L(1): adc x0, x11, xzr
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/popcount.asm b/mpn/arm64/popcount.asm
index 74de3fc01..4ea179faf 100644
--- a/mpn/arm64/popcount.asm
+++ b/mpn/arm64/popcount.asm
@@ -59,12 +59,14 @@ define(`chunksize',0x1ff0)
ASM_START()
PROLOGUE(mpn_popcount)
+ BTI_C
mov x11, #maxsize
cmp n, x11
b.hi L(gt8k)
-L(lt8k):
+L(lt8k): BTI_C
+
movi v4.16b, #0 C clear summation register
movi v5.16b, #0 C clear summation register
@@ -94,7 +96,8 @@ L(gt4): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
L(000): subs n, n, #8
b.lo L(e0)
-L(chu): ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
+L(chu): BTI_C
+ ld1 {v2.2d,v3.2d}, [ap], #32 C load 4 limbs
ld1 {v0.2d,v1.2d}, [ap], #32 C load 4 limbs
cnt v6.16b, v2.16b
cnt v7.16b, v3.16b
@@ -155,3 +158,4 @@ L(gt8k):
mov x30, x8
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/rsh1aors_n.asm b/mpn/arm64/rsh1aors_n.asm
index afd3d5be4..17487d5d7 100644
--- a/mpn/arm64/rsh1aors_n.asm
+++ b/mpn/arm64/rsh1aors_n.asm
@@ -56,9 +56,11 @@ ifdef(`OPERATION_rsh1sub_n', `
define(`func_n', mpn_rsh1sub_n)')
MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+ BTI_C
ASM_START()
PROLOGUE(func_n)
+ BTI_C
lsr x6, n, #2
tbz n, #0, L(bx0)
@@ -166,3 +168,4 @@ L(2): cset x14, COND
L(ret): mov x0, x10
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/rshift.asm b/mpn/arm64/rshift.asm
index 90187ad51..d3fc16301 100644
--- a/mpn/arm64/rshift.asm
+++ b/mpn/arm64/rshift.asm
@@ -58,6 +58,7 @@ define(`NSHIFT', lsl)
ASM_START()
PROLOGUE(mpn_rshift)
+ BTI_C
mov rp, rp_arg
sub tnc, xzr, cnt
lsr x17, n, #2
@@ -134,3 +135,4 @@ L(end): orr x10, x10, x13
str x2, [rp,#32]
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/sec_tabselect.asm b/mpn/arm64/sec_tabselect.asm
index 18a268ace..d671b6f74 100644
--- a/mpn/arm64/sec_tabselect.asm
+++ b/mpn/arm64/sec_tabselect.asm
@@ -57,6 +57,7 @@ define(`maskq', `v4')
ASM_START()
PROLOGUE(mpn_sec_tabselect)
+ BTI_C
dup v7.2d, x4 C 2 `which' copies
mov x10, #1
@@ -120,3 +121,4 @@ L(tp1): cmeq maskq.2d, v5.2d, v7.2d
L(b00): ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/arm64/sqr_diag_addlsh1.asm b/mpn/arm64/sqr_diag_addlsh1.asm
index 39f1cb1bc..599717d3a 100644
--- a/mpn/arm64/sqr_diag_addlsh1.asm
+++ b/mpn/arm64/sqr_diag_addlsh1.asm
@@ -46,6 +46,7 @@ define(`n', `x3')
ASM_START()
PROLOGUE(mpn_sqr_diag_addlsh1)
+ BTI_C
ldr x15, [up],#8
lsr x14, n, #1
tbz n, #0, L(bx0)
@@ -100,3 +101,4 @@ L(end): extr x9, x6, x5, #63
ret
EPILOGUE()
+ADD_GNU_NOTES_IF_NEEDED
diff --git a/mpn/m4-ccas b/mpn/m4-ccas
index 16d80c6f5..1d68bfe8b 100755
--- a/mpn/m4-ccas
+++ b/mpn/m4-ccas
@@ -49,6 +49,8 @@ CC=
DEFS=
ASM=
SEEN_O=no
+M4_GENPATH=
+M4_GENERATED=
for i in "$@"; do
case $i in
@@ -73,6 +75,9 @@ for i in "$@"; do
SEEN_O=yes
CC="$CC $i"
;;
+ --m4-gen-path=*)
+ M4_GENPATH=`echo "$i" | sed 's/^--m4-gen-path=//'`
+ ;;
*)
CC="$CC $i"
;;
@@ -97,11 +102,23 @@ if test $SEEN_O = no; then
CC="$CC -o $BASENAME.o"
fi
-echo "$M4 $DEFS $ASM >$TMP"
-$M4 $DEFS $ASM >$TMP || exit
+# Does the architecture have any dynamically generated m4?
+# if so execute the generation script
+if test -n "$M4_GENPATH"; then
+ if ! test -f "$M4_GENPATH"; then
+ echo "$M4_GENPATH not found."
+ exit 1
+ fi
+ echo "$M4_GENPATH \"$CC\""
+ M4_GENERATED="${TMP%.*}.m4"
+ "$M4_GENPATH" "$CC" > "$M4_GENERATED" || exit
+fi
+
+echo "$M4 $DEFS $M4_GENERATED $ASM >$TMP"
+$M4 $DEFS "$M4_GENERATED" $ASM >$TMP || exit
echo "$CC"
$CC || exit
# Comment this out to preserve .s intermediates
-rm -f $TMP
+rm -f $TMP "$M4_GENERATED"
--
2.45.2
More information about the gmp-devel
mailing list