[PATCH] Improve and consolidate sparc PIC assembler.

David Miller davem at davemloft.net
Wed Apr 10 19:48:14 CEST 2013


From: Torbjorn Granlund <tg at gmplib.org>
Date: Wed, 10 Apr 2013 14:35:13 +0200

> Please use LEA* instead of LOAD_SYMBOL*, since that's what we use
> elsewhere.  (OK, LEA might be a misnomer, but a well-established one in
> and outside of GMP.)

Ok.

> I assume your broad testing covers every modified file.  Do you have an
> idea of whether that is true.

I rechecked everything and the one case I missed was supersparc-*

Even the current tree has a build problem of the supersparc target
with current tools due to combination of a bug in gcc specs handling
and new binutils enforcements of setting the cpu ABI correctly.

The issue is that gcc doesn't specify at least "v8" in the assembler
invocations when -mcpu=supersparc is given so binutils complains when
it sees integer multiply and divide instructions since it defaults to
v7.

I'll get those bugs sorted out, but at least gcc-4.6 and gcc-4.7 have
this problem, and have had them for some time, so I think we should
work around it.  A workaround that works is to pass "-mcpu=v8
-mcpu=supersparc" instead of just plain "-mcpu=supersparc"

For the sake of evaluating this LEA patch, I forced this CFLAGS by
hand on the make command line to make sure my LEA patches didn't
introduce any new problems.

> Whn testing shared libs, I have found that libtool sometimes prefers an
> instaled version to the newly compiled version.  That happens more often
> with 32-bit libs on 64-bit systems, since libtool doesn't set
> LD_32_LIBRARY_PATH.  Please make sure the shared builds' libraries have
> actually been tested.

I've verified that this works as intended.  LD_32_LIBRARY_PATH seems
to be a FreeBSD invention.

> That patch looks good to me, apart from the LEA issue.
> 
> Once you have addressed that, I would like to commit this to the main

Here is the new version, thanks:

2013-04-10  David S. Miller  <davem at davemloft.net>

	* acinclude.m4 (GMP_ASM_SPARC_GOTDATA,
	GMP_ASM_SPARC_SHARED_THUNKS): New feature tests.
	* configure.ac: Call GMP_ASM_SPARC_GOTDATA and
	GMP_ASM_SPARC_SHARED_THUNKS on sparc.
	* mpn/sparc32/sparc-defs.m4 (LEA, LEA_LEAF, LEA_THUNK): New
	macros.
	* mpn/sparc32/udiv.asm: Convert over to LEA, LEA_LEAF, and
	LEA_THUNK.
	* mpn/sparc32/v8/addmul_1.asm: Likewise.
	* mpn/sparc32/v8/mul_1.asm: Likewise.
	* mpn/sparc32/v8/supersparc/udiv.asm: Likewise.
	* mpn/sparc32/v8/udiv.asm: Likewise.
	* mpn/sparc64/gcd_1.asm: Likewise.
	* mpn/sparc64/ultrasparct3/dive_1.asm: Likewise.
	* mpn/sparc64/ultrasparct3/invert_limb.asm: Likewise.
	* mpn/sparc64/ultrasparct3/mode1o.asm: Likewise.
	* mpn/sparc32/v9/sqr_diagonal.asm: Likewise and use INT32.

diff -r a51d8e63e08e acinclude.m4
--- a/acinclude.m4	Tue Apr 09 15:05:39 2013 +0200
+++ b/acinclude.m4	Wed Apr 10 10:01:13 2013 -0700
@@ -3090,6 +3090,57 @@
 ])
 
 
+dnl  GMP_ASM_SPARC_GOTDATA
+dnl  ----------------------
+dnl  Determine whether the assembler accepts gotdata relocations.
+dnl
+dnl  See also mpn/sparc32/sparc-defs.m4 which uses the result of this test.
+
+AC_DEFUN([GMP_ASM_SPARC_GOTDATA],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler accepts gotdata relocations],
+               gmp_cv_asm_sparc_gotdata,
+[GMP_TRY_ASSEMBLE(
+[	$gmp_cv_asm_text
+	.text
+	sethi	%gdop_hix22(symbol), %g1
+	or	%g1, %gdop_lox10(symbol), %g1
+],
+[gmp_cv_asm_sparc_gotdata=yes],
+[gmp_cv_asm_sparc_gotdata=no])])
+
+GMP_DEFINE_RAW(["define(<HAVE_GOTDATA>,<$gmp_cv_asm_sparc_gotdata>)"])
+])
+
+
+dnl  GMP_ASM_SPARC_SHARED_THUNKS
+dnl  ----------------------
+dnl  Determine whether the assembler supports all of the features
+dnl  necessary in order to emit shared PIC thunks on sparc.
+dnl
+dnl  See also mpn/sparc32/sparc-defs.m4 which uses the result of this test.
+
+AC_DEFUN([GMP_ASM_SPARC_SHARED_THUNKS],
+[AC_REQUIRE([GMP_ASM_TEXT])
+AC_CACHE_CHECK([if the assembler can support shared PIC thunks],
+               gmp_cv_asm_sparc_shared_thunks,
+[GMP_TRY_ASSEMBLE(
+[	$gmp_cv_asm_text
+	.section	.text.__sparc_get_pc_thunk.l7,"axG", at progbits,__sparc_get_pc_thunk.l7,comdat
+	.weak	__sparc_get_pc_thunk.l7
+	.hidden	__sparc_get_pc_thunk.l7
+	.type	__sparc_get_pc_thunk.l7, #function
+__sparc_get_pc_thunk.l7:
+	jmp	%o7+8
+	 add	%o7, %l7, %l7
+],
+[gmp_cv_asm_sparc_shared_thunks=yes],
+[gmp_cv_asm_sparc_shared_thunks=no])])
+
+GMP_DEFINE_RAW(["define(<HAVE_SHARED_THUNKS>,<$gmp_cv_asm_sparc_shared_thunks>)"])
+])
+
+
 dnl  GMP_C_ATTRIBUTE_CONST
 dnl  ---------------------
 
diff -r a51d8e63e08e configure.ac
--- a/configure.ac	Tue Apr 09 15:05:39 2013 +0200
+++ b/configure.ac	Wed Apr 10 10:01:13 2013 -0700
@@ -3483,12 +3483,14 @@
     power*-*-aix*)
       GMP_INCLUDE_MPN(powerpc32/aix.m4)
       ;;
-    sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
+    *sparc*-*-*)
       case $ABI in
         64)
           GMP_ASM_SPARC_REGISTER
           ;;
       esac
+      GMP_ASM_SPARC_GOTDATA
+      GMP_ASM_SPARC_SHARED_THUNKS
       ;;
     X86_PATTERN | X86_64_PATTERN)
       GMP_ASM_ALIGN_FILL_0x90
diff -r a51d8e63e08e mpn/sparc32/sparc-defs.m4
--- a/mpn/sparc32/sparc-defs.m4	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/sparc-defs.m4	Wed Apr 10 10:01:13 2013 -0700
@@ -35,6 +35,100 @@
 `ifelse(HAVE_REGISTER,yes,
 `.register `$1',`$2'')')
 
+dnl  Usage: LEA(symbol,reg,pic_reg)
+dnl
+dnl  Use whatever code sequence is appropriate to load "symbol" into register
+dnl  "reg", potentially using register "pic_reg" to perform the calculations.
+dnl  This takes into consideration things like PIC, whether we are generating
+dnl  64-bit code, etc.
+
+define(LEA,
+m4_assert_numargs(3)
+m4_assert_defined(`HAVE_GOTDATA')
+`ifdef(`PIC',`
+ifelse(HAVE_GOTDATA,yes,`
+	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %`$3'
+	call	__sparc_get_pc_thunk.`$3'
+	 or	%`$3', %lo(_GLOBAL_OFFSET_TABLE_+4), %`$3'
+99:	sethi	%gdop_hix22(`$1'), %`$2'
+	xor	%`$2', %gdop_lox10(`$1'), %`$2'
+ifdef(`HAVE_ABI_64',`
+	ldx	[%`$3' + %`$2'], %`$2', %gdop(`$1')',`
+	ld	[%`$3' + %`$2'], %`$2', %gdop(`$1')')',`
+	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %`$3'
+	call	__sparc_get_pc_thunk.`$3'
+	 or	%`$3', %lo(_GLOBAL_OFFSET_TABLE_+4), %`$3'
+99:	sethi	%hi(`$1'), %`$2'
+	or	%`$2', %lo(`$1'), %`$2'
+ifdef(`HAVE_ABI_64',`
+	ldx	[%`$3' + %`$2'], %`$2'',`
+	ld	[%`$3' + %`$2'], %`$2'')')',`
+ifdef(`HAVE_ABI_64',`
+	setx	`$1', %`$3', %`$2'',`
+	set	`$1', %`$2'')')')
+
+dnl  Usage: LEA_LEAF(symbol,reg,pic_reg)
+dnl
+dnl  Exactly the same as LEA except that it works in a leaf function.
+dnl  Specifically, when generating PIC code, it makes sure to preserve the %o7
+dnl  register.
+
+define(LEA_LEAF,
+m4_assert_numargs(3)
+m4_assert_defined(`HAVE_GOTDATA')
+`ifdef(`PIC',`
+ifelse(HAVE_GOTDATA,yes,`
+	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %`$3'
+	mov	%o7, %`$2'
+	call	__sparc_get_pc_thunk.`$3'
+	 or	%`$3', %lo(_GLOBAL_OFFSET_TABLE_+4), %`$3'
+99:	mov	%`$2', %o7
+	sethi	%gdop_hix22(`$1'), %`$2'
+	xor	%`$2', %gdop_lox10(`$1'), %`$2'
+ifdef(`HAVE_ABI_64',`
+	ldx	[%`$3' + %`$2'], %`$2', %gdop(`$1')',`
+	ld	[%`$3' + %`$2'], %`$2', %gdop(`$1')')',`
+	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %`$3'
+	mov	%o7, %`$2'
+	call	__sparc_get_pc_thunk.`$3'
+	 or	%`$3', %lo(_GLOBAL_OFFSET_TABLE_+4), %`$3'
+99:	mov	%`$2', %o7
+	sethi	%hi(`$1'), %`$2'
+	or	%`$2', %lo(`$1'), %`$2'
+ifdef(`HAVE_ABI_64',`
+	ldx	[%`$3' + %`$2'], %`$2'',`
+	ld	[%`$3' + %`$2'], %`$2'')')',`
+ifdef(`HAVE_ABI_64',`
+	setx	`$1', %`$3', %`$2'',`
+	set	`$1', %`$2'')')')
+
+dnl  Usage: LEA_THUNK(pic_reg)
+dnl
+dnl  Files that make use of LEA and LEA_LEAF must emit a PIC thunk using
+dnl  LEA_THUNK.  The "pic_reg" argument given must exactly the same as the
+dnl  one given to the LEA LEA_LEAF invocations.
+dnl
+dnl  If multiple PIC registers are used in invocations of LEA and LEA_LEAF then
+dnl  multiple thunks needs to be emitted, one for each PIC register used.
+
+define(LEA_THUNK,
+m4_assert_numargs(1)
+m4_assert_defined(`HAVE_SHARED_THUNKS')
+`ifdef(`PIC',`
+ifelse(HAVE_SHARED_THUNKS,yes,`
+	.section	.text.__sparc_get_pc_thunk.`$1',"axG", at progbits,__sparc_get_pc_thunk.`$1',comdat
+	.weak	__sparc_get_pc_thunk.`$1'
+	.hidden	__sparc_get_pc_thunk.`$1'
+	.type	__sparc_get_pc_thunk.`$1', #function
+__sparc_get_pc_thunk.`$1':
+	jmp	%o7+8
+	 add	%o7, %`$1', %`$1'
+	TEXT
+',`
+	TEXT
+__sparc_get_pc_thunk.`$1':
+	retl
+	 add	%o7, %`$1', %`$1'')')')
 
 C Testing mechanism for running newer code on older processors
 ifdef(`FAKE_T3',`
diff -r a51d8e63e08e mpn/sparc32/udiv.asm
--- a/mpn/sparc32/udiv.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/udiv.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -28,13 +28,7 @@
 C d		i3
 
 ASM_START()
-
-ifdef(`PIC',
-`	TEXT
-L(getpc):
-	retl
-	nop')
-
+	LEA_THUNK(l7)
 	TEXT
 	ALIGN(8)
 L(C0):	.double	0r4294967296
@@ -45,11 +39,8 @@
 	st	%i1,[%fp-8]
 	ld	[%fp-8],%f10
 
-ifdef(`PIC',
-`L(pc):	call	L(getpc)		C put address of this insn in %o7
-	ldd	[%o7+L(C0)-L(pc)],%f8',
-`	sethi	%hi(L(C0)),%o7
-	ldd	[%o7+%lo(L(C0))],%f8')
+	LEA(L(C0),l0,l7)
+	ldd	[%l0], %f8
 
 	fitod	%f10,%f4
 	cmp	%i1,0
@@ -75,10 +66,8 @@
 L(250):
 	fdivd	%f2,%f4,%f2
 
-ifdef(`PIC',
-`	ldd	[%o7+L(C1)-L(pc)],%f4',
-`	sethi	%hi(L(C1)),%o7
-	ldd	[%o7+%lo(L(C1))],%f4')
+	LEA(L(C1),l0,l7)
+	ldd	[%l0], %f4
 
 	fcmped	%f2,%f4
 	nop
diff -r a51d8e63e08e mpn/sparc32/v8/addmul_1.asm
--- a/mpn/sparc32/v8/addmul_1.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/v8/addmul_1.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -28,21 +28,15 @@
 C s2_limb	o3
 
 ASM_START()
+	LEA_THUNK(g4)
+	TEXT
 PROLOGUE(mpn_addmul_1)
 	orcc	%g0,%g0,%g2
 	ld	[%o1+0],%o4	C 1
 
 	sll	%o2,4,%g1
 	and	%g1,(4-1)<<4,%g1
-ifdef(`PIC',
-`	mov	%o7,%g4		C Save return address register
-0:	call	1f
-	add	%o7,L(1)-0b,%g3
-1:	mov	%g4,%o7		C Restore return address register
-',
-`	sethi	%hi(L(1)),%g3
-	or	%g3,%lo(L(1)),%g3
-')
+	LEA_LEAF(L(1),g3,g4)
 	jmp	%g3+%g1
 	nop
 L(1):
diff -r a51d8e63e08e mpn/sparc32/v8/mul_1.asm
--- a/mpn/sparc32/v8/mul_1.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/v8/mul_1.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -28,18 +28,12 @@
 C s2_limb	o3
 
 ASM_START()
+	LEA_THUNK(g4)
+	TEXT
 PROLOGUE(mpn_mul_1)
 	sll	%o2,4,%g1
 	and	%g1,(4-1)<<4,%g1
-ifdef(`PIC',
-`	mov	%o7,%g4		C Save return address register
-0:	call	1f
-	add	%o7,L(1)-0b,%g3
-1:	mov	%g4,%o7		C Restore return address register
-',
-`	sethi	%hi(L(1)),%g3
-	or	%g3,%lo(L(1)),%g3
-')
+	LEA_LEAF(L(1),g3,g4)
 	jmp	%g3+%g1
 	ld	[%o1+0],%o4	C 1
 L(1):
diff -r a51d8e63e08e mpn/sparc32/v8/supersparc/udiv.asm
--- a/mpn/sparc32/v8/supersparc/udiv.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/v8/supersparc/udiv.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -29,28 +29,18 @@
 C d		i3
 
 ASM_START()
-
-ifdef(`PIC',
-`	TEXT
-L(getpc):
-	retl
-	nop')
-
+	LEA_THUNK(l7)
 	TEXT
 	ALIGN(8)
 L(C0):	.double	0r4294967296
 L(C1):	.double	0r2147483648
-
 PROLOGUE(mpn_udiv_qrnnd)
 	save	%sp,-104,%sp
 	st	%i1,[%fp-8]
 	ld	[%fp-8],%f10
 
-ifdef(`PIC',
-`L(pc):	call	L(getpc)		C put address of this insn in %o7
-	ldd	[%o7+L(C0)-L(pc)],%f8',
-`	sethi	%hi(L(C0)),%o7
-	ldd	[%o7+%lo(L(C0))],%f8')
+	LEA(L(C0),l0,l7)
+	ldd	[%l0], %f8
 
 	fitod	%f10,%f4
 	cmp	%i1,0
@@ -76,10 +66,8 @@
 L(250):
 	fdivd	%f2,%f4,%f2
 
-ifdef(`PIC',
-`	ldd	[%o7+L(C1)-L(pc)],%f4',
-`	sethi	%hi(L(C1)),%o7
-	ldd	[%o7+%lo(L(C1))],%f4')
+	LEA(L(C1),l0,l7)
+	ldd	[%l0], %f4
 
 	fcmped	%f2,%f4
 	nop
diff -r a51d8e63e08e mpn/sparc32/v8/udiv.asm
--- a/mpn/sparc32/v8/udiv.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/v8/udiv.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -29,28 +29,18 @@
 C d		i3
 
 ASM_START()
-
-ifdef(`PIC',
-`	TEXT
-L(getpc):
-	retl
-	nop')
-
+	LEA_THUNK(l7)
 	TEXT
 	ALIGN(8)
 L(C0):	.double	0r4294967296
 L(C1):	.double	0r2147483648
-
 PROLOGUE(mpn_udiv_qrnnd)
 	save	%sp,-104,%sp
 	st	%i1,[%fp-8]
 	ld	[%fp-8],%f10
 
-ifdef(`PIC',
-`L(pc):	call	L(getpc)		C put address of this insn in %o7
-	ldd	[%o7+L(C0)-L(pc)],%f8',
-`	sethi	%hi(L(C0)),%o7
-	ldd	[%o7+%lo(L(C0))],%f8')
+	LEA(L(C0),l0,l7)
+	ldd	[%l0], %f8
 
 	fitod	%f10,%f4
 	cmp	%i1,0
@@ -76,10 +66,8 @@
 L(250):
 	fdivd	%f2,%f4,%f2
 
-ifdef(`PIC',
-`	ldd	[%o7+L(C1)-L(pc)],%f4',
-`	sethi	%hi(L(C1)),%o7
-	ldd	[%o7+%lo(L(C1))],%f4')
+	LEA(L(C1),l0,l7)
+	ldd	[%l0], %f4
 
 	fcmped	%f2,%f4
 	nop
diff -r a51d8e63e08e mpn/sparc32/v9/sqr_diagonal.asm
--- a/mpn/sparc32/v9/sqr_diagonal.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc32/v9/sqr_diagonal.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -73,20 +73,14 @@
 define(`fanop',`fitod %f12,%f10')	dnl  A quasi nop running in the FA pipe
 
 ASM_START()
-
+	LEA_THUNK(l7)
 	TEXT
-	ALIGN(4)
-.Lnoll:
-	.word	0
-
+	INT32(noll, 0)
 PROLOGUE(mpn_sqr_diagonal)
 	save	%sp,-256,%sp
 
-ifdef(`PIC',
-`.Lpc:	rd	%pc,%o7
-	ld	[%o7+.Lnoll-.Lpc],%f8',
-`	sethi	%hi(.Lnoll),%g1
-	ld	[%g1+%lo(.Lnoll)],%f8')
+	LEA(L(noll),l0,l7)
+	ld	[%l0], %f8
 
 	sethi	%hi(0xffff0000),%g5
 	add	%i1,-8,%i1
diff -r a51d8e63e08e mpn/sparc64/gcd_1.asm
--- a/mpn/sparc64/gcd_1.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc64/gcd_1.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -58,6 +58,8 @@
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
+	LEA_THUNK(l7)
+	TEXT
 PROLOGUE(mpn_gcd_1)
 	save	%sp, -192, %sp
 	ldx	[up+0], %g1		C U low limb
@@ -93,22 +95,7 @@
 	 mov	0, %o3
 
 L(noreduce):
-
-ifdef(`PIC',`
-	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %l7
-	call	L(LGETPC0)
-	add	%l7, %lo(_GLOBAL_OFFSET_TABLE_+4), %l7
-	sethi	%hi(ctz_table), %g1
-	or	%g1, %lo(ctz_table), %g1
-	ldx	[%l7+%g1], %i5
-',`
-	sethi	%hh(ctz_table), %l7
-	or	%l7, %hm(ctz_table), %l7
-	sllx	%l7, 32, %l7
-	sethi	%lm(ctz_table), %g1
-	add	%l7, %g1, %l7
-	or	%l7, %lo(ctz_table), %i5
-')
+	LEA(ctz_table,i5,l7)
 
 	cmp	%o0, 0
 	bnz	%xcc, L(mid)
@@ -135,10 +122,4 @@
 L(shift_alot):
 	b	L(mid)
 	 and	%o0, MASK, %g3		C
-
-ifdef(`PIC',`
-L(LGETPC0):
-	retl
-	add	%o7, %l7, %l7
-')
 EPILOGUE()
diff -r a51d8e63e08e mpn/sparc64/ultrasparct3/dive_1.asm
--- a/mpn/sparc64/ultrasparct3/dive_1.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/dive_1.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -36,6 +36,8 @@
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
+	LEA_THUNK(l7)
+	TEXT
 PROLOGUE(mpn_divexact_1)
 	save	%sp, -176, %sp
 	cmp	%i2, 1
@@ -54,23 +56,7 @@
 	srlx	d, 1, %g1
 	and	%g1, 127, %g1
 
-ifdef(`PIC',`
-C	save	%sp, -192, %sp
-	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %l7
-	call	L(GETPC0)
-	add	%l7, %lo(_GLOBAL_OFFSET_TABLE_+4), %l7
-	sethi	%hi(binvert_limb_table), %g2
-	or	%g2, %lo(binvert_limb_table), %g2
-	ldx	[%l7+%g2], %g2
-C	restore
-',`
-	sethi	%hh(binvert_limb_table), %g3
-	or	%g3, %hm(binvert_limb_table), %g3
-	sllx	%g3, 32, %g3
-	sethi	%lm(binvert_limb_table), %g2
-	add	%g3, %g2, %g3
-	or	%g3, %lo(binvert_limb_table), %g2
-')
+	LEA(binvert_limb_table,g2,l7)
 	ldub	[%g2+%g1], %g1
 	add	%g1, %g1, %g2
 	mulx	%g1, %g1, %g1
@@ -131,10 +117,4 @@
 
 	return	%i7+8
 	 nop
-
-ifdef(`PIC',`
-L(GETPC0):
-	retl
-	add	%o7, %l7, %l7
-')
 EPILOGUE()
diff -r a51d8e63e08e mpn/sparc64/ultrasparct3/invert_limb.asm
--- a/mpn/sparc64/ultrasparct3/invert_limb.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/invert_limb.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -31,11 +31,13 @@
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
+	LEA_THUNK(g3)
+	TEXT
 PROLOGUE(mpn_invert_limb)
 	srlx	d, 55, %g1
 	add	%g1, %g1, %g1
-	sethi	%hi(approx_tab-512), %g2
-	or	%g2, %lo(approx_tab-512), %g2
+	LEA_LEAF(approx_tab,g2,g3)
+	sub	%g2, 512, %g2
 	lduh	[%g2+%g1], %g3
 	srlx	d, 24, %g4
 	add	%g4, 1, %g4
diff -r a51d8e63e08e mpn/sparc64/ultrasparct3/mode1o.asm
--- a/mpn/sparc64/ultrasparct3/mode1o.asm	Tue Apr 09 15:05:39 2013 +0200
+++ b/mpn/sparc64/ultrasparct3/mode1o.asm	Wed Apr 10 10:01:13 2013 -0700
@@ -37,27 +37,13 @@
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
+	LEA_THUNK(g3)
+	TEXT
 PROLOGUE(mpn_modexact_1c_odd)
 	srlx	d, 1, %g1
 	and	%g1, 127, %g1
 
-ifdef(`PIC',`
-	save	%sp, -192, %sp
-	sethi	%hi(_GLOBAL_OFFSET_TABLE_-4), %l7
-	call	L(GETPC0)
-	add	%l7, %lo(_GLOBAL_OFFSET_TABLE_+4), %l7
-	sethi	%hi(binvert_limb_table), %g2
-	or	%g2, %lo(binvert_limb_table), %g2
-	ldx	[%l7+%g2], %g2
-	restore
-',`
-	sethi	%hh(binvert_limb_table), %g3
-	or	%g3, %hm(binvert_limb_table), %g3
-	sllx	%g3, 32, %g3
-	sethi	%lm(binvert_limb_table), %g2
-	add	%g3, %g2, %g3
-	or	%g3, %lo(binvert_limb_table), %g2
-')
+	LEA_LEAF(binvert_limb_table,g2,g3)
 	ldub	[%g2+%g1], %g1
 	add	%g1, %g1, %g2
 	mulx	%g1, %g1, %g1
@@ -84,10 +70,4 @@
 
 	retl
 	 mov	cy, %o0
-
-ifdef(`PIC',`
-L(GETPC0):
-	retl
-	add	%o7, %l7, %l7
-')
 EPILOGUE()


More information about the gmp-devel mailing list