Problems building on HP-UX 11.31/IA
Albert Chin
gmp-bugs at mlists.thewrittenword.com
Sun Aug 25 18:11:54 CEST 2013
On Sat, Aug 24, 2013 at 07:31:44PM +0200, Niels Möller wrote:
> Albert Chin <gmp-bugs at mlists.thewrittenword.com> writes:
>
> > /opt/fsw/m414/bin/gm4 -DHAVE_CONFIG_H -D__GMP_WITHIN_GMP -DOPERATION_add_n -DPIC add_n.asm >tmp-add_n.s
> > cc -c -DHAVE_CONFIG_H -I. -I.. -D__GMP_WITHIN_GMP -I.. -DOPERATION_add_n +O3 tmp-add_n.s +Z -DPIC -o .libs/add_n.o
> > tmp-add_n.s:116: error 4032: must be in a bundle
> > tmp-add_n.s:147: error 4032: must be in a bundle
>
> Can you show what the tmp-add_n.s file looks like? (Not sure, but you
> may need to comment out the rm command on the last line of the script
> mpn/m4-ccas).
>
> The assembler file in question has seen only one, apparently trivial,
> change since 2011, so if it has stopped working now, it might be due to
> other changes in the configure or m4 machinery.
gmp-5.0.5 built fine on this platform.
gmp-5.0.5% ls -ld add_n.asm
lrwxrwxr-x 1 china src 22 Aug 25 14:38 add_n.asm -> ../mpn/ia64/aors_n.asm
gmp-5.1.2% ls -ld add_n.asm
lrwxrwxr-x 1 china src 22 Aug 25 14:38 add_n.asm -> ../mpn/ia64/aors_n.asm
>From the 5.1.2 ChangeLog:
2010-04-10 Torbjorn Granlund <tege at gmplib.org>
* mpn/ia64/lorrshift.asm: Rewrite feed-in and wind-down code.
* mpn/ia64/aorslsh1_n.asm: Adapt to new aorslsh1_n.
* mpn/ia64/aorslsh1_n.asm: Likewise.
* mpn/ia64/aors_n.asm: Complete rewrite.
* mpn/ia64/aorslsh1_n.asm: Likewise.
* mpn/ia64/add_n_sub_n.asm: Misc cleanups. Add slotting comments.
* mpn/ia64/lshiftc.asm: New file.
It seems the following changesets were introduced between 5.0.5 and
5.1.2:
changeset: 13762:389ed05793c7
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Jan 22 22:30:31 2011 +0100
summary: Fix some incorrect bundle types.
changeset: 13558:a4322947edf2
user: Torbjorn Granlund <tege at gmplib.org>
date: Sat Apr 10 18:29:36 2010 +0200
summary: Rewrite Itanium addition code.
changeset: 13495:228d9deade5b
user: Torbjorn Granlund <tege at gmplib.org>
date: Mon Mar 15 02:12:46 2010 +0100
summary: Insert explicitly typed nops to trigger intended bundling.
I've attached a copy of tmp-add_n.s from 5.0.5 and 5.1.2.
--
albert chin (china at thewrittenword.com)
-------------- next part --------------
.text
.align 32
.global __gmpn_add_n#
.proc __gmpn_add_n#
__gmpn_add_n:
.prologue
.save ar.lc, r2
.body
addp4 r32 = 0, r32
addp4 r33 = 0, r33
addp4 r34 = 0, r34
zxt4 r35 = r35
;;
{.mmi
ld8 r11 = [r34], 8
ld8 r10 = [r33], 8
mov.i r2 = ar.lc
}
{.mmi
and r14 = 7, r35
cmp.lt p15, p14 = 8, r35
add r35 = -8, r35
;;
}
{.mmi
cmp.eq p6, p0 = 1, r14
cmp.eq p7, p0 = 2, r14
cmp.eq p8, p0 = 3, r14
}
{.bbb
(p6) br.dptk .Lb001
(p7) br.dptk .Lb010
(p8) br.dptk .Lb011
;;
}
{.mmi
cmp.eq p9, p0 = 4, r14
cmp.eq p10, p0 = 5, r14
cmp.eq p11, p0 = 6, r14
}
{.bbb
(p9) br.dptk .Lb100
(p10) br.dptk .Lb101
(p11) br.dptk .Lb110
;;
}
{.mmb
cmp.eq p12, p0 = 7, r14
add r35 = -1, r35
(p12) br.dptk .Lb111
}
.Lb000: ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
add r3 = 8, r32
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
add r9 = r10, r11
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
cmp.ltu p7, p0 = r9, r10
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
add r8 = r16, r26
;;
ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
cmp.ltu p8, p0 = r8, r16
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
add r23 = r17, r27
;;
ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
cmp.ltu p9, p0 = r23, r17
(p7) cmp.eq.or p8, p0 = -1, r8
(p7) add r8 = 1, r8
(p14) br.cond.dptk .Lcj8
;;
.grt8: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
add r11 = 512, r34
ld8 r26 = [r34], 8
add r10 = 512, r33
ld8 r16 = [r33], 8
nop.i 0
nop.b 0
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
mov.i ar.lc = r35
br .LL000
.Lb001: add r3 = 16, r32
add r22 = r10, r11
(p15) br.cond.dpnt .grt1
;;
cmp.ltu p6, p0 = r22, r10
mov r8 = 0
br .Lcj1
.grt1: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
cmp.ne p9, p0 = r0, r0
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
mov.i ar.lc = r35
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
cmp.ltu p6, p0 = r22, r10
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
add r9 = r15, r25
;;
ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
cmp.ltu p7, p0 = r9, r15
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
add r8 = r16, r26
;;
add r11 = 512, r34
ld8 r24 = [r34], 8
add r10 = 512, r33
ld8 r14 = [r33], 8
br.cloop.dptk .Loop
br .Lcj9
.Lb010: ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
add r3 = 24, r32
add r23 = r10, r11
(p15) br.cond.dpnt .grt2
;;
cmp.ltu p9, p0 = r23, r10
add r22 = r14, r24
br .Lcj2
.grt2: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
mov.i ar.lc = r35
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
cmp.ltu p9, p0 = r23, r10
;;
ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
add r22 = r14, r24
;;
add r11 = 512, r34
ld8 r31 = [r34], 8
add r10 = 512, r33
ld8 r21 = [r33], 8
br .LL01x
.Lb011: ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
add r8 = r10, r11
;;
ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
(p15) br.cond.dpnt .grt3
;;
cmp.ltu p8, p0 = r8, r10
add r23 = r21, r31
;;
st8 [r32] = r8, 8
cmp.ltu p9, p0 = r23, r21
br .Lcj3
.grt3: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
add r3 = 32, r32
;;
ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
shr.u r35 = r35, 3
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
cmp.ltu p8, p0 = r8, r10
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
mov.i ar.lc = r35
add r23 = r21, r31
nop.i 0
nop.b 0
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
cmp.ltu p9, p0 = r23, r21
;;
add r11 = 512, r34
ld8 r30 = [r34], 8
add r10 = 512, r33
ld8 r20 = [r33], 8
(p8) cmp.eq.or p9, p0 = -1, r23
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
(p8) add r23 = 1, r23
st8 [r32] = r8, 8
add r22 = r14, r24
br .LL01x
.Lb100: ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
add r3 = 8, r32
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
add r9 = r10, r11
;;
ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
(p15) br.cond.dpnt .grt4
;;
cmp.ltu p7, p0 = r9, r10
add r8 = r20, r30
;;
cmp.ltu p8, p0 = r8, r20
add r23 = r21, r31
br .Lcj4
.grt4: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
cmp.ltu p7, p0 = r9, r10
;;
ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
add r8 = r20, r30
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
cmp.ltu p8, p0 = r8, r20
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
mov.i ar.lc = r35
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
add r23 = r21, r31
;;
add r11 = 512, r34
ld8 r30 = [r34], 8
add r10 = 512, r33
ld8 r20 = [r33], 8
cmp.ltu p9, p0 = r23, r21
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
(p7) cmp.eq.or p8, p0 = -1, r8
(p7) add r8 = 1, r8
br .LL100
.Lb101: ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
add r3 = 16, r32
;;
ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
add r22 = r10, r11
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
cmp.ltu p6, p0 = r22, r10
;;
ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
add r9 = r19, r29
shr.u r35 = r35, 3
(p15) br.cond.dpnt .grt5
;;
cmp.ltu p7, p0 = r9, r19
add r8 = r20, r30
br .Lcj5
.grt5: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
;;
ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
mov.i ar.lc = r35
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
cmp.ltu p7, p0 = r9, r19
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
add r8 = r20, r30
;;
add r11 = 512, r34
ld8 r29 = [r34], 8
add r10 = 512, r33
ld8 r19 = [r33], 8
br .LL101
.Lb110: ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
add r3 = 24, r32
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
add r23 = r10, r11
;;
ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
shr.u r35 = r35, 3
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
cmp.ltu p9, p0 = r23, r10
;;
ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
add r22 = r18, r28
(p14) br.cond.dptk .Lcj67
;;
.grt6: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
mov.i ar.lc = r35
cmp.ltu p9, p0 = r23, r10
nop.i 0
nop.b 0
;;
ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
add r22 = r18, r28
;;
add r11 = 512, r34
ld8 r27 = [r34], 8
add r10 = 512, r33
ld8 r17 = [r33], 8
br .LL11x
.Lb111: ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
add r3 = 32, r32
;;
ld8 r28 = [r34], 8
ld8 r18 = [r33], 8
add r8 = r10, r11
;;
ld8 r29 = [r34], 8
ld8 r19 = [r33], 8
cmp.ltu p8, p0 = r8, r10
;;
ld8 r30 = [r34], 8
ld8 r20 = [r33], 8
add r23 = r17, r27
;;
ld8 r31 = [r34], 8
ld8 r21 = [r33], 8
cmp.ltu p9, p0 = r23, r17
;;
ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
(p15) br.cond.dpnt .grt7
;;
st8 [r32] = r8, 8
(p8) cmp.eq.or p9, p0 = -1, r23
(p8) add r23 = 1, r23
add r22 = r18, r28
br .Lcj67
.grt7: ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
(p8) cmp.eq.or p9, p0 = -1, r23
nop.i 0
nop.b 0
;;
add r11 = 512, r34
ld8 r26 = [r34], 8
add r10 = 512, r33
ld8 r16 = [r33], 8
(p8) add r23 = 1, r23
nop.b 0
;;
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
mov.i ar.lc = r35
st8 [r32] = r8, 8
add r22 = r18, r28
br .LL11x
.align 32
.Loop: ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r9, r15
(p9) cmp.eq.or p6, p0 = -1, r22
ld8 r15 = [r33], 8
(p9) add r22 = 1, r22
add r8 = r16, r26
;;
ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r8, r16
(p6) cmp.eq.or p7, p0 = -1, r9
ld8 r16 = [r33], 8
(p6) add r9 = 1, r9
add r23 = r17, r27
;;
st8 [r32] = r22, 8
ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r23, r17
(p7) cmp.eq.or p8, p0 = -1, r8
ld8 r17 = [r33], 8
(p7) add r8 = 1, r8
;;
.LL000: st8 [r32] = r9, 16
st8 [r3] = r8, 32
(p8) cmp.eq.or p9, p0 = -1, r23
lfetch [r10], 64
(p8) add r23 = 1, r23
add r22 = r18, r28
;;
.LL11x: st8 [r32] = r23, 8
ld8 r28 = [r34], 8
cmp.ltu p6, p0 = r22, r18
ld8 r18 = [r33], 8
add r9 = r19, r29
;;
ld8 r29 = [r34], 8
cmp.ltu p7, p0 = r9, r19
(p9) cmp.eq.or p6, p0 = -1, r22
ld8 r19 = [r33], 8
(p9) add r22 = 1, r22
add r8 = r20, r30
;;
.LL101: ld8 r30 = [r34], 8
cmp.ltu p8, p0 = r8, r20
(p6) cmp.eq.or p7, p0 = -1, r9
ld8 r20 = [r33], 8
(p6) add r9 = 1, r9
add r23 = r21, r31
;;
st8 [r32] = r22, 8
ld8 r31 = [r34], 8
cmp.ltu p9, p0 = r23, r21
(p7) cmp.eq.or p8, p0 = -1, r8
ld8 r21 = [r33], 8
(p7) add r8 = 1, r8
;;
.LL100: st8 [r32] = r9, 16
st8 [r3] = r8, 32
(p8) cmp.eq.or p9, p0 = -1, r23
lfetch [r11], 64
(p8) add r23 = 1, r23
add r22 = r14, r24
;;
.LL01x: st8 [r32] = r23, 8
ld8 r24 = [r34], 8
cmp.ltu p6, p0 = r22, r14
ld8 r14 = [r33], 8
add r9 = r15, r25
br.cloop.dptk .Loop
;;
cmp.ltu p7, p0 = r9, r15
(p9) cmp.eq.or p6, p0 = -1, r22
(p9) add r22 = 1, r22
add r8 = r16, r26
;;
.Lcj9: cmp.ltu p8, p0 = r8, r16
(p6) cmp.eq.or p7, p0 = -1, r9
st8 [r32] = r22, 8
(p6) add r9 = 1, r9
add r23 = r17, r27
;;
cmp.ltu p9, p0 = r23, r17
(p7) cmp.eq.or p8, p0 = -1, r8
(p7) add r8 = 1, r8
;;
.Lcj8: st8 [r32] = r9, 16
st8 [r3] = r8, 32
(p8) cmp.eq.or p9, p0 = -1, r23
(p8) add r23 = 1, r23
add r22 = r18, r28
;;
.Lcj67: st8 [r32] = r23, 8
cmp.ltu p6, p0 = r22, r18
add r9 = r19, r29
;;
cmp.ltu p7, p0 = r9, r19
(p9) cmp.eq.or p6, p0 = -1, r22
(p9) add r22 = 1, r22
add r8 = r20, r30
;;
.Lcj5: cmp.ltu p8, p0 = r8, r20
(p6) cmp.eq.or p7, p0 = -1, r9
st8 [r32] = r22, 8
(p6) add r9 = 1, r9
add r23 = r21, r31
;;
.Lcj4: cmp.ltu p9, p0 = r23, r21
(p7) cmp.eq.or p8, p0 = -1, r8
(p7) add r8 = 1, r8
;;
st8 [r32] = r9, 16
st8 [r3] = r8, 32
.Lcj3:
(p8) cmp.eq.or p9, p0 = -1, r23
(p8) add r23 = 1, r23
add r22 = r14, r24
;;
.Lcj2: st8 [r32] = r23, 8
cmp.ltu p6, p0 = r22, r14
;;
(p9) cmp.eq.or p6, p0 = -1, r22
(p9) add r22 = 1, r22
mov r8 = 0
;;
.Lcj1: st8 [r32] = r22, 8
mov.i ar.lc = r2
(p6) mov r8 = 1
br.ret.sptk.many b0
.endp __gmpn_add_n#
-------------- next part --------------
.text
.align 32
.global __gmpn_add_nc#
.proc __gmpn_add_nc#
__gmpn_add_nc:
.prologue
.save ar.lc, r2
.body
addp4 r32 = 0, r32
addp4 r33 = 0, r33
addp4 r34 = 0, r34
zxt4 r35 = r35
;;
{.mmi; ld8 r11 = [r34], 8
ld8 r10 = [r33], 8
mov r2 = ar.lc
}{.mmi; and r14 = 7, r35
cmp.lt p15, p14 = 8, r35
add r35 = -6, r35
;;
}
.mmi; add r20 = 500, r33
add r21 = 500, r34
mov r23 = r36
;;
{.mmi; cmp.eq p6, p0 = 1, r14
cmp.eq p7, p0 = 2, r14
cmp.eq p8, p0 = 3, r14
}{.bbb
(p6) br.dptk .Lc001
(p7) br.dptk .Lc010
(p8) br.dptk .Lc011
;;
}
{.mmi; cmp.eq p9, p0 = 4, r14
cmp.eq p10, p0 = 5, r14
cmp.eq p11, p0 = 6, r14
}{.bbb
(p9) br.dptk .Lc100
(p10) br.dptk .Lc101
(p11) br.dptk .Lc110
;;
}{.mmi; ld8 r19 = [r34], 8
ld8 r18 = [r33], 8
cmp.ne p13, p0 = 0, r36
}{.mmb; cmp.eq p12, p0 = 7, r14
nop 0
(p12) br.dptk .Lc111
;;
}
.Lc000:
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; add r21 = 500, r34
ld8 r24 = [r34], 8
mov ar.lc = r35
.mmi; ld8 r14 = [r33], 8
add r29 = r10, r11
nop 0
;;
.mmi; add r20 = 500, r33
ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r29, r10
.mmi; ld8 r15 = [r33], 8
add r30 = r18, r19
add r3 = 8, r32
;;
.mmi; ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r18
(p13) cmp.eq.or p7, p0 = -1, r29
.mmi; ld8 r16 = [r33], 8
(p13) add r29 = 1, r29
add r31 = r17, r27
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
(p7) cmp.eq.or p8, p0 = -1, r30
.mmb; ld8 r17 = [r33], 8
(p7) add r30 = 1, r30
br .Lm0
.Lc001:
.mmi;
(p15) ld8 r25 = [r34], 8
(p15) ld8 r15 = [r33], 8
add r28 = r10, r11
.mmb; nop 0
nop 0
(p15) br 1f
;;
.mmi; cmp.ne p9, p0 = 0, r23
mov r8 = 0
cmp.ltu p6, p0 = r28, r10
;;
.mmb;
(p9) cmp.eq.or p6, p0 = -1, r28
(p9) add r28 = 1, r28
br .Lcj1
1:
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
mov ar.lc = r35
.mmi; nop 0
cmp.ne p9, p0 = 0, r23
nop 0
;;
.mmi; ld8 r24 = [r34], 8
cmp.ltu p6, p0 = r28, r10
add r3 = 16, r32
.mmb; ld8 r14 = [r33], 8
add r29 = r15, r25
br .Lc1
.Lc010:
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
mov r8 = 0
.mmb; add r31 = r10, r11
cmp.ne p8, p0 = 0, r23
(p15) br 1f
;;
.mmi; cmp.ltu p9, p0 = r31, r10
add r28 = r14, r24
(p8) add r31 = 1, r31
;;
.mmb; cmp.ltu p6, p0 = r28, r14
(p8) cmp.eq.or p9, p0 = 0, r31
br .Lcj2
1:
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
mov ar.lc = r35
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
cmp.ltu p9, p0 = r31, r10
;;
.mmi;
(p8) cmp.eq.or p9, p0 = -1, r31
(p8) add r31 = 1, r31
add r28 = r14, r24
.mmb; add r3 = 24, r32
nop 0
br .Lm23
.Lc011:
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
shr.u r35 = r35, 3
.mmi; add r30 = r10, r11
cmp.ne p7, p0 = 0, r23
nop 0
;;
.mmb; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
(p15) br 1f
.mmi; cmp.ltu p8, p0 = r30, r10
add r31 = r17, r27
nop 0
;;
.mmb;
(p7) cmp.eq.or p8, p0 = -1, r30
(p7) add r30 = 1, r30
br .Lcj3
1:
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
add r31 = r17, r27
;;
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
cmp.ltu p8, p0 = r30, r10
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
mov ar.lc = r35
.mmi; ld8 r17 = [r33], 8
(p7) cmp.eq.or p8, p0 = -1, r30
(p7) add r30 = 1, r30
;;
.mmi; add r3 = 32, r32
st8 [r32] = r30, 8
(p8) cmp.eq.or p9, p0 = -1, r31
.mmb;
(p8) add r31 = 1, r31
add r28 = r14, r24
br .Lm23
.Lc100:
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
shr.u r35 = r35, 3
.mmi; add r29 = r10, r11
nop 0
nop 0
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
add r3 = 8, r32
.mmi; cmp.ne p6, p0 = 0, r23
cmp.ltu p7, p0 = r29, r10
nop 0
;;
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
add r30 = r16, r26
.mmb;
(p6) cmp.eq.or p7, p0 = -1, r29
(p6) add r29 = 1, r29
(p14) br .Lcj4
;;
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
mov ar.lc = r35
;;
.mmi; ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r16
nop 0
.mmi; ld8 r16 = [r33], 8
nop 0
add r31 = r17, r27
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
(p7) cmp.eq.or p8, p0 = -1, r30
.mmb; ld8 r17 = [r33], 8
(p7) add r30 = 1, r30
br .Lm4
.Lc101:
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
mov ar.lc = r35
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
add r28 = r10, r11
.mmi; cmp.ne p9, p0 = 0, r23
add r3 = 16, r32
nop 0
;;
.mmi; ld8 r24 = [r34], 8
cmp.ltu p6, p0 = r28, r10
ld8 r14 = [r33], 8
.mbb; add r29 = r15, r25
(p15) br .Lc5
br .Lend
.Lc110:
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
mov ar.lc = r35
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
add r31 = r10, r11
;;
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
add r28 = r14, r24
.mmi; cmp.ltu p9, p0 = r31, r10
cmp.ne p8, p0 = 0, r23
add r3 = 24, r32
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
nop 0
.mmb;
(p8) cmp.eq.or p9, p0 = -1, r31
(p8) add r31 = 1, r31
br .Lm67
.Lc111:
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; add r20 = 500, r33
ld8 r25 = [r34], 8
mov ar.lc = r35
.mmi; ld8 r15 = [r33], 8
add r30 = r10, r11
nop 0
;;
.mmi; add r21 = 500, r34
ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r10
.mmi; ld8 r16 = [r33], 8
add r31 = r18, r19
nop 0
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r18
(p13) cmp.eq.or p8, p0 = -1, r30
.mmi; ld8 r17 = [r33], 8
(p13) add r30 = 1, r30
nop 0
;;
.mmi; add r3 = 32, r32
st8 [r32] = r30, 8
(p8) cmp.eq.or p9, p0 = -1, r31
.mmb;
(p8) add r31 = 1, r31
add r28 = r14, r24
br .Lm67
.endp __gmpn_add_nc#
.text
.align 32
.global __gmpn_add_n#
.proc __gmpn_add_n#
__gmpn_add_n:
.prologue
.save ar.lc, r2
.body
addp4 r32 = 0, r32
addp4 r33 = 0, r33
addp4 r34 = 0, r34
zxt4 r35 = r35
;;
{.mmi; ld8 r11 = [r34], 8
ld8 r10 = [r33], 8
mov r2 = ar.lc
}{.mmi; and r14 = 7, r35
cmp.lt p15, p14 = 8, r35
add r35 = -6, r35
;;
}{.mmi; cmp.eq p6, p0 = 1, r14
cmp.eq p7, p0 = 2, r14
cmp.eq p8, p0 = 3, r14
}{.bbb
(p6) br.dptk .Lb001
(p7) br.dptk .Lb010
(p8) br.dptk .Lb011
;;
}{.mmi; cmp.eq p9, p0 = 4, r14
cmp.eq p10, p0 = 5, r14
cmp.eq p11, p0 = 6, r14
}{.bbb
(p9) br.dptk .Lb100
(p10) br.dptk .Lb101
(p11) br.dptk .Lb110
;;
}{.mmi; ld8 r19 = [r34], 8
ld8 r18 = [r33], 8
cmp.ne p13, p0 = r0, r0
}{.mmb; cmp.eq p12, p0 = 7, r14
mov r23 = 0
(p12) br.dptk .Lb111
;;
}
.Lb000:
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
add r29 = r10, r11
;;
.mmi; ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r29, r10
mov ar.lc = r35
.mmi; ld8 r15 = [r33], 8
add r30 = r18, r19
add r3 = 8, r32
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
cmp.ltu p8, p0 = r30, r18
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
add r31 = r17, r27
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
(p7) cmp.eq.or p8, p0 = -1, r30
.mmb; ld8 r17 = [r33], 8
(p7) add r30 = 1, r30
br .Lm0
.align 32
.Lb001:
.mmi; add r28 = r10, r11
(p15) ld8 r25 = [r34], 8
mov r8 = 0
;;
.mmb; cmp.ltu p6, p0 = r28, r10
(p15) ld8 r15 = [r33], 8
(p14) br .Lcj1
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
shr.u r35 = r35, 3
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
cmp.ltu p6, p0 = r28, r10
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
mov ar.lc = r35
;;
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
add r29 = r15, r25
;;
.mmi; ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r29, r15
add r30 = r16, r26
.mmb; ld8 r15 = [r33], 8
add r3 = 16, r32
br .Lm1
.align 32
.Lb010:
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
shr.u r35 = r35, 3
.mmb; add r31 = r10, r11
nop 0
(p15) br .Lgt2
;;
.mmi; cmp.ltu p9, p0 = r31, r10
add r28 = r14, r24
mov r8 = 0
;;
.mmb; nop 0
cmp.ltu p6, p0 = r28, r14
br .Lcj2
.Lgt2:
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
nop 0
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
mov ar.lc = r35
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
nop 0
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r10
add r28 = r14, r24
.mmb; ld8 r17 = [r33], 8
add r3 = 24, r32
br .Lm23
.align 32
.Lb011:
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
add r30 = r10, r11
;;
.mmb; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
(p15) br 1f
.mmb; cmp.ltu p8, p0 = r30, r10
add r31 = r17, r27
br .Lcj3
1:
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
add r31 = r17, r27
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
cmp.ltu p8, p0 = r30, r10
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
mov ar.lc = r35
.mmi; ld8 r17 = [r33], 8
nop 0
nop 0
;;
.mmi; add r3 = 32, r32
st8 [r32] = r30, 8
(p8) cmp.eq.or p9, p0 = -1, r31
.mmb;
(p8) add r31 = 1, r31
add r28 = r14, r24
br .Lm23
.align 32
.Lb100:
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
add r29 = r10, r11
;;
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
cmp.ltu p7, p0 = r29, r10
.mmb; nop 0
add r30 = r16, r26
(p14) br .Lcj4
;;
.Lgt4:
.mmi; add r20 = 500, r33
add r21 = 500, r34
mov ar.lc = r35
ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
nop 0
;;
.mmi; ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r16
nop 0
.mmi; ld8 r16 = [r33], 8
add r31 = r17, r27
add r3 = 8, r32
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
(p7) cmp.eq.or p8, p0 = -1, r30
.mmb; ld8 r17 = [r33], 8
(p7) add r30 = 1, r30
br .Lm4
.align 32
.Lb101:
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
add r28 = r10, r11
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
add r3 = 16, r32
ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
nop 0
;;
.mmi; ld8 r24 = [r34], 8
cmp.ltu p6, p0 = r28, r10
nop 0
.mmb; ld8 r14 = [r33], 8
add r29 = r15, r25
(p14) br .Lcj5
;;
.Lgt5:
.mmi; ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r29, r15
mov ar.lc = r35
.mmb; ld8 r15 = [r33], 8
add r30 = r16, r26
br .Lm5
.align 32
.Lb110:
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
add r31 = r10, r11
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
mov ar.lc = r35
.mmi; ld8 r26 = [r34], 8
ld8 r16 = [r33], 8
nop 0
;;
.mmi; ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r10
add r28 = r14, r24
.mmb; ld8 r17 = [r33], 8
add r3 = 24, r32
br .Lm67
.align 32
.Lb111:
.mmi; ld8 r24 = [r34], 8
ld8 r14 = [r33], 8
shr.u r35 = r35, 3
;;
.mmi; ld8 r25 = [r34], 8
ld8 r15 = [r33], 8
add r30 = r10, r11
;;
.mmi; ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r10
mov ar.lc = r35
.mmi; ld8 r16 = [r33], 8
add r31 = r18, r19
nop 0
;;
.mmi; add r20 = 500, r33
add r21 = 500, r34
nop 0
.mmi; ld8 r27 = [r34], 8
ld8 r17 = [r33], 8
cmp.ltu p9, p0 = r31, r18
;;
.mmi; add r3 = 32, r32
st8 [r32] = r30, 8
(p8) cmp.eq.or p9, p0 = -1, r31
.mmb;
(p8) add r31 = 1, r31
add r28 = r14, r24
br .Lm67
.align 32
.Ltop:
.Lc5: ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r29, r15
(p9) cmp.eq.or p6, p0 = -1, r28
ld8 r15 = [r33], 8
(p9) add r28 = 1, r28
add r30 = r16, r26
;;
.Lm5: ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r16
(p6) cmp.eq.or p7, p0 = -1, r29
ld8 r16 = [r33], 8
(p6) add r29 = 1, r29
add r31 = r17, r27
;;
st8 [r32] = r28, 8
ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
(p7) cmp.eq.or p8, p0 = -1, r30
ld8 r17 = [r33], 8
(p7) add r30 = 1, r30
;;
.Lm4: st8 [r32] = r29, 16
st8 [r3] = r30, 32
(p8) cmp.eq.or p9, p0 = -1, r31
lfetch [r20], 64
(p8) add r31 = 1, r31
add r28 = r14, r24
;;
.Lm23: st8 [r32] = r31, 8
ld8 r24 = [r34], 8
cmp.ltu p6, p0 = r28, r14
ld8 r14 = [r33], 8
add r29 = r15, r25
nop.b 0
;;
.Lc1: ld8 r25 = [r34], 8
cmp.ltu p7, p0 = r29, r15
(p9) cmp.eq.or p6, p0 = -1, r28
ld8 r15 = [r33], 8
(p9) add r28 = 1, r28
add r30 = r16, r26
;;
.Lm1: ld8 r26 = [r34], 8
cmp.ltu p8, p0 = r30, r16
(p6) cmp.eq.or p7, p0 = -1, r29
ld8 r16 = [r33], 8
(p6) add r29 = 1, r29
add r31 = r17, r27
;;
st8 [r32] = r28, 8
ld8 r27 = [r34], 8
cmp.ltu p9, p0 = r31, r17
(p7) cmp.eq.or p8, p0 = -1, r30
ld8 r17 = [r33], 8
(p7) add r30 = 1, r30
;;
.Lm0: st8 [r32] = r29, 16
st8 [r3] = r30, 32
(p8) cmp.eq.or p9, p0 = -1, r31
lfetch [r21], 64
(p8) add r31 = 1, r31
add r28 = r14, r24
;;
.Lm67: st8 [r32] = r31, 8
ld8 r24 = [r34], 8
cmp.ltu p6, p0 = r28, r14
ld8 r14 = [r33], 8
add r29 = r15, r25
br.cloop.dptk .Ltop
;;
.Lend:
.mmi;
(p9) cmp.eq.or p6, p0 = -1, r28
(p9) add r28 = 1, r28
mov ar.lc = r2
.Lcj5:
.mmi; cmp.ltu p7, p0 = r29, r15
add r30 = r16, r26
nop 0
;;
.mmi; st8 [r32] = r28, 8
(p6) cmp.eq.or p7, p0 = -1, r29
(p6) add r29 = 1, r29
.Lcj4:
.mmi; cmp.ltu p8, p0 = r30, r16
add r31 = r17, r27
nop 0
;;
.mmi; st8 [r32] = r29, 8
(p7) cmp.eq.or p8, p0 = -1, r30
(p7) add r30 = 1, r30
.Lcj3:
.mmi; cmp.ltu p9, p0 = r31, r17
add r28 = r14, r24
nop 0
;;
.mmi; st8 [r32] = r30, 8
(p8) cmp.eq.or p9, p0 = -1, r31
(p8) add r31 = 1, r31
.mmi; cmp.ltu p6, p0 = r28, r14
nop 0
mov r8 = 0
;;
.Lcj2:
.mmi; st8 [r32] = r31, 8
(p9) cmp.eq.or p6, p0 = -1, r28
(p9) add r28 = 1, r28
;;
.Lcj1:
.mmb; st8 [r32] = r28, 8
(p6) mov r8 = 1
br.ret.sptk.many b0
.endp __gmpn_add_n#
More information about the gmp-bugs
mailing list