[Gmp-commit] /var/hg/gmp: Make ia64 code work for HP-UX.
mercurial at gmplib.org
mercurial at gmplib.org
Thu Jul 9 18:27:35 UTC 2015
details: /var/hg/gmp/rev/d6e0a9147663
changeset: 16741:d6e0a9147663
user: Torbjorn Granlund <torbjorng at google.com>
date: Thu Jul 09 20:27:33 2015 +0200
description:
Make ia64 code work for HP-UX.
diffstat:
ChangeLog | 14 +
mpn/ia64/add_n_sub_n.asm | 2 -
mpn/ia64/addmul_2.asm | 235 ++++---
mpn/ia64/aors_n.asm | 1204 ++++++++++++++++++++--------------------
mpn/ia64/aorsorrlshC_n.asm | 139 ++--
mpn/ia64/cnd_aors_n.asm | 59 +-
mpn/ia64/gcd_1.asm | 138 ++--
mpn/ia64/lshiftc.asm | 4 +-
mpn/ia64/mod_34lsub1.asm | 35 +-
mpn/ia64/mul_2.asm | 171 ++--
mpn/ia64/sec_tabselect.asm | 52 +-
mpn/ia64/sqr_diag_addlsh1.asm | 56 +-
12 files changed, 1082 insertions(+), 1027 deletions(-)
diffs (truncated from 3388 to 300 lines):
diff -r 122bbab78804 -r d6e0a9147663 ChangeLog
--- a/ChangeLog Mon Jul 06 09:38:38 2015 +0200
+++ b/ChangeLog Thu Jul 09 20:27:33 2015 +0200
@@ -1,3 +1,17 @@
+2015-07-09 Torbjörn Granlund <torbjorng at google.com>
+
+ * mpn/ia64/add_n_sub_n.asm: Make it work for HP-UX.
+ * mpn/ia64/addmul_2.asm: Likewise.
+ * mpn/ia64/aors_n.asm: Likewise.
+ * mpn/ia64/aorsorrlshC_n.asm: Likewise.
+ * mpn/ia64/cnd_aors_n.asm: Likewise.
+ * mpn/ia64/gcd_1.asm: Likewise.
+ * mpn/ia64/lshiftc.asm: Likewise.
+ * mpn/ia64/mod_34lsub1.asm: Likewise.
+ * mpn/ia64/mul_2.asm: Likewise.
+ * mpn/ia64/sec_tabselect.asm: Likewise.
+ * mpn/ia64/sqr_diag_addlsh1.asm: Likewise.
+
2015-07-01 Marco Bodrato <bodrato at mail.dm.unipi.it>
* gmp-impl.h(MPN_FILL): New macro, generalise MPN_ZERO.
diff -r 122bbab78804 -r d6e0a9147663 mpn/ia64/add_n_sub_n.asm
--- a/mpn/ia64/add_n_sub_n.asm Mon Jul 06 09:38:38 2015 +0200
+++ b/mpn/ia64/add_n_sub_n.asm Thu Jul 09 20:27:33 2015 +0200
@@ -53,8 +53,6 @@
define(`vp0',`vp')
define(`vp1',`r15')
-define(`cmpltu', `cmp.ltu')
-define(`cmpeqor', `cmp.eq.or')
ASM_START()
PROLOGUE(mpn_add_n_sub_n)
diff -r 122bbab78804 -r d6e0a9147663 mpn/ia64/addmul_2.asm
--- a/mpn/ia64/addmul_2.asm Mon Jul 06 09:38:38 2015 +0200
+++ b/mpn/ia64/addmul_2.asm Thu Jul 09 20:27:33 2015 +0200
@@ -108,36 +108,37 @@
.body
ifdef(`HAVE_ABI_32',`
-.mmi; addp4 rp = 0, rp C M I
+ {.mmi; addp4 rp = 0, rp C M I
addp4 up = 0, up C M I
addp4 vp = 0, vp C M I
-.mmi; nop 1
+}{.mmi; nop 1
nop 1
zxt4 n = n C I
- ;;')
+ ;;
+}')
-.mmi; ldf8 ux = [up], 8 C M
+ {.mmi; ldf8 ux = [up], 8 C M
ldf8 v0 = [vp], 8 C M
mov r2 = ar.lc C I0
-.mmi; ldf8 rx = [rp], 8 C M
+}{.mmi; ldf8 rx = [rp], 8 C M
and r14 = 3, n C M I
add n = -2, n C M I
;;
-.mmi; ldf8 uy = [up], 8 C M
+}{.mmi; ldf8 uy = [up], 8 C M
ldf8 v1 = [vp] C M
shr.u n = n, 2 C I0
-.mmi; ldf8 ry = [rp], -8 C M
+}{.mmi; ldf8 ry = [rp], -8 C M
cmp.eq p14, p0 = 1, r14 C M I
cmp.eq p11, p0 = 2, r14 C M I
;;
-.mmi; add srp = 16, rp C M I
+}{.mmi; add srp = 16, rp C M I
cmp.eq p15, p0 = 3, r14 C M I
mov ar.lc = n C I0
-.bbb; (p14) br.dptk L(x01) C B
+}{.bbb; (p14) br.dptk L(x01) C B
(p11) br.dptk L(x10) C B
(p15) br.dptk L(x11) C B
;;
-
+}
L(x00): cmp.ne p6, p0 = r0, r0 C suppress initial xma pair
mov fp2a_3 = f0
br L(b00)
@@ -159,52 +160,53 @@
.body
ifdef(`HAVE_ABI_32',`
-.mmi; addp4 rp = 0, rp C M I
+ {.mmi; addp4 rp = 0, rp C M I
addp4 up = 0, up C M I
addp4 vp = 0, vp C M I
-.mmi; nop 1
+}{.mmi; nop 1
nop 1
zxt4 n = n C I
- ;;')
+ ;;
+}')
-.mmi; ldf8 ux = [up], 8 C M
+ {.mmi; ldf8 ux = [up], 8 C M
ldf8 v0 = [vp], 8 C M
mov r2 = ar.lc C I0
-.mmi; ldf8 rx = [rp], 8 C M
+}{.mmi; ldf8 rx = [rp], 8 C M
and r14 = 3, n C M I
add n = -2, n C M I
;;
-.mmi; ldf8 uy = [up], 8 C M
+}{.mmi; ldf8 uy = [up], 8 C M
ldf8 v1 = [vp] C M
shr.u n = n, 2 C I0
-.mmi; ldf8 ry = [rp], -8 C M
+}{.mmi; ldf8 ry = [rp], -8 C M
cmp.eq p14, p0 = 1, r14 C M I
cmp.eq p11, p0 = 2, r14 C M I
;;
-.mmi; add srp = 16, rp C M I
+}{.mmi; add srp = 16, rp C M I
cmp.eq p15, p6 = 3, r14 C M I
mov ar.lc = n C I0
-.bbb; (p14) br.dptk L(b01) C B
+}{.bbb; (p14) br.dptk L(b01) C B
(p11) br.dptk L(b10) C B
(p15) br.dptk L(b11) C B
;;
-
+}
ALIGN(32)
L(b00):
-.mmi; ldf8 r_1 = [srp], 8
+ {.mmi; ldf8 r_1 = [srp], 8
ldf8 u_1 = [up], 8
mov acc1_2 = 0
-.mmi; mov pr1_2 = 0
+}{.mmi; mov pr1_2 = 0
mov pr0_3 = 0
cmp.ne p8, p9 = r0, r0
;;
-.mfi; ldf8 r_2 = [srp], 8
+}{.mfi; ldf8 r_2 = [srp], 8
xma.l fp0b_3 = ux, v0, rx
cmp.ne p12, p13 = r0, r0
-.mfb; ldf8 u_2 = [up], 8
+}{.mfb; ldf8 u_2 = [up], 8
xma.hu fp1b_3 = ux, v0, rx
br.cloop.dptk L(gt4)
-
+}
xma.l fp0b_0 = uy, v0, ry
xma.hu fp1a_0 = uy, v0, ry
;;
@@ -253,30 +255,30 @@
ALIGN(32)
L(b01):
-.mmi; ldf8 r_0 = [srp], 8 C M
+ {.mmi; ldf8 r_0 = [srp], 8 C M
ldf8 u_0 = [up], 8 C M
mov acc1_1 = 0 C M I
-.mmi; mov pr1_1 = 0 C M I
+}{.mmi; mov pr1_1 = 0 C M I
mov pr0_2 = 0 C M I
cmp.ne p6, p7 = r0, r0 C M I
;;
-.mfi; ldf8 r_1 = [srp], 8 C M
+}{.mfi; ldf8 r_1 = [srp], 8 C M
xma.l fp0b_2 = ux, v0, rx C F
cmp.ne p10, p11 = r0, r0 C M I
-.mfi; ldf8 u_1 = [up], 8 C M
+}{.mfi; ldf8 u_1 = [up], 8 C M
xma.hu fp1b_2 = ux, v0, rx C F
nop 1
;;
- xma.l fp0b_3 = uy, v0, ry C F
+} xma.l fp0b_3 = uy, v0, ry C F
xma.hu fp1a_3 = uy, v0, ry C F
;;
-.mmf; getfsig acc0 = fp0b_2 C M
+ {.mmf; getfsig acc0 = fp0b_2 C M
ldf8 r_2 = [srp], 8 C M
(p14) xma.hu fp2a_2 = ux, v1,fp1b_2 C F suppressed for addmul_2s
-.mfb; ldf8 u_2 = [up], 8 C M
+}{.mfb; ldf8 u_2 = [up], 8 C M
(p14) xma.l fp1b_2 = ux, v1,fp1b_2 C F suppressed for addmul_2s
br.cloop.dptk L(gt5)
-
+}
xma.l fp0b_0 = u_0, v0, r_0 C F
xma.hu fp1a_0 = u_0, v0, r_0 C F
;;
@@ -335,7 +337,7 @@
cmp.ltu p8, p9 = s0, pr1_1
sub r31 = -1, acc1_1
;;
- .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p8, p9
(p8) add acc0 = pr1_2, acc1_1, 1
(p9) add acc0 = pr1_2, acc1_1
(p8) cmp.leu p10, p0 = r31, pr1_2
@@ -347,18 +349,18 @@
L(gt2):
-.mmi; ldf8 r_3 = [srp], 8
+ {.mmi; ldf8 r_3 = [srp], 8
ldf8 u_3 = [up], 8
mov acc1_0 = 0
;;
-.mfi; ldf8 r_0 = [srp], 8
+}{.mfi; ldf8 r_0 = [srp], 8
xma.l fp0b_1 = ux, v0, rx
mov pr1_0 = 0
-.mfi; ldf8 u_0 = [up], 8
+}{.mfi; ldf8 u_0 = [up], 8
xma.hu fp1b_1 = ux, v0, rx
mov pr0_1 = 0
;;
- xma.l fp0b_2 = uy, v0, ry
+} xma.l fp0b_2 = uy, v0, ry
xma.hu fp1a_2 = uy, v0, ry
;;
getfsig acc0 = fp0b_1
@@ -378,12 +380,13 @@
ldf8 u_2 = [up], 8
getfsig pr1_1 = fp1b_1
;;
-.mfi; getfsig acc1_1 = fp2a_1
+ {.mfi; getfsig acc1_1 = fp2a_1
xma.l fp0b_0 = u_0, v0, r_0
cmp.ne p8, p9 = r0, r0
-.mfb; cmp.ne p12, p13 = r0, r0
+}{.mfb; cmp.ne p12, p13 = r0, r0
xma.hu fp1a_0 = u_0, v0, r_0
br.cloop.sptk.clr L(top)
+}
br.many L(end)
@@ -455,7 +458,7 @@
C *** MAIN LOOP START ***
ALIGN(32)
L(top): C 00
- .pred.rel "mutex", p12, p13
+ .pred.rel "mutex", p12, p13
getfsig pr0_3 = fp0b_3
ldf8 r_3 = [srp], 8
xma.l fp1b_3 = u_3, v1, fp1a_3
@@ -463,8 +466,8 @@
(p13) add s0 = pr1_0, acc0
xma.hu fp2a_3 = u_3, v1, fp1a_3
;; C 01
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
ldf8 u_3 = [up], 8
getfsig pr1_2 = fp1b_2
(p8) cmp.leu p6, p7 = acc0, pr0_1
@@ -472,7 +475,7 @@
(p12) cmp.leu p10, p11 = s0, pr1_0
(p13) cmp.ltu p10, p11 = s0, pr1_0
;; C 02
- .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p6, p7
getfsig acc1_2 = fp2a_2
st8 [rp] = s0, 8
xma.l fp0b_1 = u_1, v0, r_1
@@ -481,7 +484,7 @@
xma.hu fp1a_1 = u_1, v0, r_1
;; C 03
L(01):
- .pred.rel "mutex", p10, p11
+ .pred.rel "mutex", p10, p11
getfsig pr0_0 = fp0b_0
ldf8 r_0 = [srp], 8
xma.l fp1b_0 = u_0, v1, fp1a_0
@@ -489,8 +492,8 @@
(p11) add s0 = pr1_1, acc0
xma.hu fp2a_0 = u_0, v1, fp1a_0
;; C 04
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
ldf8 u_0 = [up], 8
getfsig pr1_3 = fp1b_3
(p6) cmp.leu p8, p9 = acc0, pr0_2
@@ -498,7 +501,7 @@
(p10) cmp.leu p12, p13 = s0, pr1_1
(p11) cmp.ltu p12, p13 = s0, pr1_1
;; C 05
- .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p8, p9
getfsig acc1_3 = fp2a_3
st8 [rp] = s0, 8
xma.l fp0b_2 = u_2, v0, r_2
@@ -507,7 +510,7 @@
xma.hu fp1a_2 = u_2, v0, r_2
More information about the gmp-commit
mailing list