[Gmp-commit] /var/hg/gmp: Make ia64 code work for HP-UX.

mercurial at gmplib.org mercurial at gmplib.org
Thu Jul 9 18:27:35 UTC 2015


details:   /var/hg/gmp/rev/d6e0a9147663
changeset: 16741:d6e0a9147663
user:      Torbjorn Granlund <torbjorng at google.com>
date:      Thu Jul 09 20:27:33 2015 +0200
description:
Make ia64 code work for HP-UX.

diffstat:

 ChangeLog                     |    14 +
 mpn/ia64/add_n_sub_n.asm      |     2 -
 mpn/ia64/addmul_2.asm         |   235 ++++---
 mpn/ia64/aors_n.asm           |  1204 ++++++++++++++++++++--------------------
 mpn/ia64/aorsorrlshC_n.asm    |   139 ++--
 mpn/ia64/cnd_aors_n.asm       |    59 +-
 mpn/ia64/gcd_1.asm            |   138 ++--
 mpn/ia64/lshiftc.asm          |     4 +-
 mpn/ia64/mod_34lsub1.asm      |    35 +-
 mpn/ia64/mul_2.asm            |   171 ++--
 mpn/ia64/sec_tabselect.asm    |    52 +-
 mpn/ia64/sqr_diag_addlsh1.asm |    56 +-
 12 files changed, 1082 insertions(+), 1027 deletions(-)

diffs (truncated from 3388 to 300 lines):

diff -r 122bbab78804 -r d6e0a9147663 ChangeLog
--- a/ChangeLog	Mon Jul 06 09:38:38 2015 +0200
+++ b/ChangeLog	Thu Jul 09 20:27:33 2015 +0200
@@ -1,3 +1,17 @@
+2015-07-09  Torbjörn Granlund  <torbjorng at google.com>
+
+	* mpn/ia64/add_n_sub_n.asm: Make it work for HP-UX.
+	* mpn/ia64/addmul_2.asm: Likewise.
+	* mpn/ia64/aors_n.asm: Likewise.
+	* mpn/ia64/aorsorrlshC_n.asm: Likewise.
+	* mpn/ia64/cnd_aors_n.asm: Likewise.
+	* mpn/ia64/gcd_1.asm: Likewise.
+	* mpn/ia64/lshiftc.asm: Likewise.
+	* mpn/ia64/mod_34lsub1.asm: Likewise.
+	* mpn/ia64/mul_2.asm: Likewise.
+	* mpn/ia64/sec_tabselect.asm: Likewise.
+	* mpn/ia64/sqr_diag_addlsh1.asm: Likewise.
+
 2015-07-01 Marco Bodrato <bodrato at mail.dm.unipi.it>
 
 	* gmp-impl.h(MPN_FILL): New macro, generalise MPN_ZERO.
diff -r 122bbab78804 -r d6e0a9147663 mpn/ia64/add_n_sub_n.asm
--- a/mpn/ia64/add_n_sub_n.asm	Mon Jul 06 09:38:38 2015 +0200
+++ b/mpn/ia64/add_n_sub_n.asm	Thu Jul 09 20:27:33 2015 +0200
@@ -53,8 +53,6 @@
 define(`vp0',`vp')
 define(`vp1',`r15')
 
-define(`cmpltu',  `cmp.ltu')
-define(`cmpeqor', `cmp.eq.or')
 
 ASM_START()
 PROLOGUE(mpn_add_n_sub_n)
diff -r 122bbab78804 -r d6e0a9147663 mpn/ia64/addmul_2.asm
--- a/mpn/ia64/addmul_2.asm	Mon Jul 06 09:38:38 2015 +0200
+++ b/mpn/ia64/addmul_2.asm	Thu Jul 09 20:27:33 2015 +0200
@@ -108,36 +108,37 @@
 	.body
 
 ifdef(`HAVE_ABI_32',`
-.mmi;		addp4	rp = 0, rp		C			M I
+ {.mmi;		addp4	rp = 0, rp		C			M I
 		addp4	up = 0, up		C			M I
 		addp4	vp = 0, vp		C			M I
-.mmi;		nop	1
+}{.mmi;		nop	1
 		nop	1
 		zxt4	n = n			C			I
-	;;')
+	;;
+}')
 
-.mmi;		ldf8	ux = [up], 8		C			M
+ {.mmi;		ldf8	ux = [up], 8		C			M
 		ldf8	v0 = [vp], 8		C			M
 		mov	r2 = ar.lc		C			I0
-.mmi;		ldf8	rx = [rp], 8		C			M
+}{.mmi;		ldf8	rx = [rp], 8		C			M
 		and	r14 = 3, n		C			M I
 		add	n = -2, n		C			M I
 	;;
-.mmi;		ldf8	uy = [up], 8		C			M
+}{.mmi;		ldf8	uy = [up], 8		C			M
 		ldf8	v1 = [vp]		C			M
 		shr.u	n = n, 2		C			I0
-.mmi;		ldf8	ry = [rp], -8		C			M
+}{.mmi;		ldf8	ry = [rp], -8		C			M
 		cmp.eq	p14, p0 = 1, r14	C			M I
 		cmp.eq	p11, p0 = 2, r14	C			M I
 	;;
-.mmi;		add	srp = 16, rp		C			M I
+}{.mmi;		add	srp = 16, rp		C			M I
 		cmp.eq	p15, p0 = 3, r14	C			M I
 		mov	ar.lc = n		C			I0
-.bbb;	(p14)	br.dptk	L(x01)			C			B
+}{.bbb;	(p14)	br.dptk	L(x01)			C			B
 	(p11)	br.dptk	L(x10)			C			B
 	(p15)	br.dptk	L(x11)			C			B
 	;;
-
+}
 L(x00):		cmp.ne	p6, p0 = r0, r0		C suppress initial xma pair
 		mov	fp2a_3 = f0
 		br	L(b00)
@@ -159,52 +160,53 @@
 	.body
 
 ifdef(`HAVE_ABI_32',`
-.mmi;		addp4	rp = 0, rp		C			M I
+ {.mmi;		addp4	rp = 0, rp		C			M I
 		addp4	up = 0, up		C			M I
 		addp4	vp = 0, vp		C			M I
-.mmi;		nop	1
+}{.mmi;		nop	1
 		nop	1
 		zxt4	n = n			C			I
-	;;')
+	;;
+}')
 
-.mmi;		ldf8	ux = [up], 8		C			M
+ {.mmi;		ldf8	ux = [up], 8		C			M
 		ldf8	v0 = [vp], 8		C			M
 		mov	r2 = ar.lc		C			I0
-.mmi;		ldf8	rx = [rp], 8		C			M
+}{.mmi;		ldf8	rx = [rp], 8		C			M
 		and	r14 = 3, n		C			M I
 		add	n = -2, n		C			M I
 	;;
-.mmi;		ldf8	uy = [up], 8		C			M
+}{.mmi;		ldf8	uy = [up], 8		C			M
 		ldf8	v1 = [vp]		C			M
 		shr.u	n = n, 2		C			I0
-.mmi;		ldf8	ry = [rp], -8		C			M
+}{.mmi;		ldf8	ry = [rp], -8		C			M
 		cmp.eq	p14, p0 = 1, r14	C			M I
 		cmp.eq	p11, p0 = 2, r14	C			M I
 	;;
-.mmi;		add	srp = 16, rp		C			M I
+}{.mmi;		add	srp = 16, rp		C			M I
 		cmp.eq	p15, p6 = 3, r14	C			M I
 		mov	ar.lc = n		C			I0
-.bbb;	(p14)	br.dptk	L(b01)			C			B
+}{.bbb;	(p14)	br.dptk	L(b01)			C			B
 	(p11)	br.dptk	L(b10)			C			B
 	(p15)	br.dptk	L(b11)			C			B
 	;;
-
+}
 	ALIGN(32)
 L(b00):
-.mmi;		ldf8	r_1 = [srp], 8
+ {.mmi;		ldf8	r_1 = [srp], 8
 		ldf8	u_1 = [up], 8
 		mov	acc1_2 = 0
-.mmi;		mov	pr1_2 = 0
+}{.mmi;		mov	pr1_2 = 0
 		mov	pr0_3 = 0
 		cmp.ne	p8, p9 = r0, r0
 	;;
-.mfi;		ldf8	r_2 = [srp], 8
+}{.mfi;		ldf8	r_2 = [srp], 8
 		xma.l	fp0b_3 = ux, v0, rx
 		cmp.ne	p12, p13 = r0, r0
-.mfb;		ldf8	u_2 = [up], 8
+}{.mfb;		ldf8	u_2 = [up], 8
 		xma.hu	fp1b_3 = ux, v0, rx
 		br.cloop.dptk	L(gt4)
-
+}
 		xma.l	fp0b_0 = uy, v0, ry
 		xma.hu	fp1a_0 = uy, v0, ry
 	;;
@@ -253,30 +255,30 @@
 
 	ALIGN(32)
 L(b01):
-.mmi;		ldf8	r_0 = [srp], 8		C M
+ {.mmi;		ldf8	r_0 = [srp], 8		C M
 		ldf8	u_0 = [up], 8		C M
 		mov	acc1_1 = 0		C M I
-.mmi;		mov	pr1_1 = 0		C M I
+}{.mmi;		mov	pr1_1 = 0		C M I
 		mov	pr0_2 = 0		C M I
 		cmp.ne	p6, p7 = r0, r0		C M I
 	;;
-.mfi;		ldf8	r_1 = [srp], 8		C M
+}{.mfi;		ldf8	r_1 = [srp], 8		C M
 		xma.l	fp0b_2 = ux, v0, rx	C F
 		cmp.ne	p10, p11 = r0, r0	C M I
-.mfi;		ldf8	u_1 = [up], 8		C M
+}{.mfi;		ldf8	u_1 = [up], 8		C M
 		xma.hu	fp1b_2 = ux, v0, rx	C F
 		nop	1
 	;;
-		xma.l	fp0b_3 = uy, v0, ry	C F
+}		xma.l	fp0b_3 = uy, v0, ry	C F
 		xma.hu	fp1a_3 = uy, v0, ry	C F
 	;;
-.mmf;		getfsig	acc0 = fp0b_2		C M
+ {.mmf;		getfsig	acc0 = fp0b_2		C M
 		ldf8	r_2 = [srp], 8		C M
 	(p14)	xma.hu	fp2a_2 = ux, v1,fp1b_2	C F	suppressed for addmul_2s
-.mfb;		ldf8	u_2 = [up], 8		C M
+}{.mfb;		ldf8	u_2 = [up], 8		C M
 	(p14)	xma.l	fp1b_2 = ux, v1,fp1b_2	C F	suppressed for addmul_2s
 		br.cloop.dptk	L(gt5)
-
+}
 		xma.l	fp0b_0 = u_0, v0, r_0	C F
 		xma.hu	fp1a_0 = u_0, v0, r_0	C F
 	;;
@@ -335,7 +337,7 @@
 		cmp.ltu	p8, p9 = s0, pr1_1
 		sub	r31 = -1, acc1_1
 	;;
-		.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p8, p9
 	(p8)	add	acc0 = pr1_2, acc1_1, 1
 	(p9)	add	acc0 = pr1_2, acc1_1
 	(p8)	cmp.leu	p10, p0 = r31, pr1_2
@@ -347,18 +349,18 @@
 
 
 L(gt2):
-.mmi;		ldf8	r_3 = [srp], 8
+ {.mmi;		ldf8	r_3 = [srp], 8
 		ldf8	u_3 = [up], 8
 		mov	acc1_0 = 0
 	;;
-.mfi;		ldf8	r_0 = [srp], 8
+}{.mfi;		ldf8	r_0 = [srp], 8
 		xma.l	fp0b_1 = ux, v0, rx
 		mov	pr1_0 = 0
-.mfi;		ldf8	u_0 = [up], 8
+}{.mfi;		ldf8	u_0 = [up], 8
 		xma.hu	fp1b_1 = ux, v0, rx
 		mov	pr0_1 = 0
 	;;
-		xma.l	fp0b_2 = uy, v0, ry
+}		xma.l	fp0b_2 = uy, v0, ry
 		xma.hu	fp1a_2 = uy, v0, ry
 	;;
 		getfsig	acc0 = fp0b_1
@@ -378,12 +380,13 @@
 		ldf8	u_2 = [up], 8
 		getfsig	pr1_1 = fp1b_1
 	;;
-.mfi;		getfsig	acc1_1 = fp2a_1
+ {.mfi;		getfsig	acc1_1 = fp2a_1
 		xma.l	fp0b_0 = u_0, v0, r_0
 		cmp.ne	p8, p9 = r0, r0
-.mfb;		cmp.ne	p12, p13 = r0, r0
+}{.mfb;		cmp.ne	p12, p13 = r0, r0
 		xma.hu	fp1a_0 = u_0, v0, r_0
 		br.cloop.sptk.clr	L(top)
+}
 		br.many	L(end)
 
 
@@ -455,7 +458,7 @@
 C *** MAIN LOOP START ***
 	ALIGN(32)
 L(top):						C 00
-		.pred.rel "mutex", p12, p13
+	.pred.rel "mutex", p12, p13
 		getfsig	pr0_3 = fp0b_3
 		ldf8	r_3 = [srp], 8
 		xma.l	fp1b_3 = u_3, v1, fp1a_3
@@ -463,8 +466,8 @@
 	(p13)	add	s0 = pr1_0, acc0
 		xma.hu	fp2a_3 = u_3, v1, fp1a_3
 	;;					C 01
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
 		ldf8	u_3 = [up], 8
 		getfsig	pr1_2 = fp1b_2
 	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
@@ -472,7 +475,7 @@
 	(p12)	cmp.leu	p10, p11 = s0, pr1_0
 	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
 	;;					C 02
-		.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p6, p7
 		getfsig	acc1_2 = fp2a_2
 		st8	[rp] = s0, 8
 		xma.l	fp0b_1 = u_1, v0, r_1
@@ -481,7 +484,7 @@
 		xma.hu	fp1a_1 = u_1, v0, r_1
 	;;					C 03
 L(01):
-		.pred.rel "mutex", p10, p11
+	.pred.rel "mutex", p10, p11
 		getfsig	pr0_0 = fp0b_0
 		ldf8	r_0 = [srp], 8
 		xma.l	fp1b_0 = u_0, v1, fp1a_0
@@ -489,8 +492,8 @@
 	(p11)	add	s0 = pr1_1, acc0
 		xma.hu	fp2a_0 = u_0, v1, fp1a_0
 	;;					C 04
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
 		ldf8	u_0 = [up], 8
 		getfsig	pr1_3 = fp1b_3
 	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
@@ -498,7 +501,7 @@
 	(p10)	cmp.leu	p12, p13 = s0, pr1_1
 	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
 	;;					C 05
-		.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p8, p9
 		getfsig	acc1_3 = fp2a_3
 		st8	[rp] = s0, 8
 		xma.l	fp0b_2 = u_2, v0, r_2
@@ -507,7 +510,7 @@
 		xma.hu	fp1a_2 = u_2, v0, r_2


More information about the gmp-commit mailing list