Problems building on HP-UX 11.31/IA

Albert Chin gmp-bugs at mlists.thewrittenword.com
Sun Aug 25 18:11:54 CEST 2013


On Sat, Aug 24, 2013 at 07:31:44PM +0200, Niels Möller wrote:
> Albert Chin <gmp-bugs at mlists.thewrittenword.com> writes:
> 
> >   /opt/fsw/m414/bin/gm4  -DHAVE_CONFIG_H -D__GMP_WITHIN_GMP -DOPERATION_add_n -DPIC add_n.asm >tmp-add_n.s
> >    cc -c -DHAVE_CONFIG_H -I. -I.. -D__GMP_WITHIN_GMP -I.. -DOPERATION_add_n +O3 tmp-add_n.s +Z -DPIC -o .libs/add_n.o
> >   tmp-add_n.s:116: error 4032: must be in a bundle
> >   tmp-add_n.s:147: error 4032: must be in a bundle
> 
> Can you show what the tmp-add_n.s file looks like? (Not sure, but you
> may need to comment out the rm command on the last line of the script
> mpn/m4-ccas).
> 
> The assembler file in question has seen only one, apparently trivial,
> change since 2011, so if it has stopped working now, it might be due to
> other changes in the configure or m4 machinery.

gmp-5.0.5 built fine on this platform.

gmp-5.0.5% ls -ld add_n.asm
lrwxrwxr-x   1 china      src             22 Aug 25 14:38 add_n.asm -> ../mpn/ia64/aors_n.asm
gmp-5.1.2% ls -ld add_n.asm
lrwxrwxr-x   1 china      src             22 Aug 25 14:38 add_n.asm -> ../mpn/ia64/aors_n.asm

>From the 5.1.2 ChangeLog:
  2010-04-10  Torbjorn Granlund  <tege at gmplib.org>

          * mpn/ia64/lorrshift.asm: Rewrite feed-in and wind-down code.

          * mpn/ia64/aorslsh1_n.asm: Adapt to new aorslsh1_n.
          * mpn/ia64/aorslsh1_n.asm: Likewise.

          * mpn/ia64/aors_n.asm: Complete rewrite.
          * mpn/ia64/aorslsh1_n.asm: Likewise.

          * mpn/ia64/add_n_sub_n.asm: Misc cleanups.  Add slotting comments.

          * mpn/ia64/lshiftc.asm: New file.

It seems the following changesets were introduced between 5.0.5 and
5.1.2:
  changeset:   13762:389ed05793c7
  user:        Torbjorn Granlund <tege at gmplib.org>
  date:        Sat Jan 22 22:30:31 2011 +0100
  summary:     Fix some incorrect bundle types.

  changeset:   13558:a4322947edf2
  user:        Torbjorn Granlund <tege at gmplib.org>
  date:        Sat Apr 10 18:29:36 2010 +0200
  summary:     Rewrite Itanium addition code.

  changeset:   13495:228d9deade5b
  user:        Torbjorn Granlund <tege at gmplib.org>
  date:        Mon Mar 15 02:12:46 2010 +0100
  summary:     Insert explicitly typed nops to trigger intended bundling.

I've attached a copy of tmp-add_n.s from 5.0.5 and 5.1.2.

-- 
albert chin (china at thewrittenword.com)
-------------- next part --------------



























































  
  
  
  
  




   
   
   
   
   
   





	
	.text
	.align	32
	.global	__gmpn_add_n#
	.proc	__gmpn_add_n#
__gmpn_add_n:
	.prologue
	.save	ar.lc, r2
	.body

	addp4		r32 = 0, r32		
	addp4		r33 = 0, r33		
	addp4		r34 = 0, r34		
	zxt4		r35 = r35			
	;;

{.mmi		
	ld8		r11 = [r34], 8		
	ld8		r10 = [r33], 8		
	mov.i		r2 = ar.lc		
}
{.mmi
	and		r14 = 7, r35		
	cmp.lt		p15, p14 = 8, r35		
	add		r35 = -8, r35		
	;;
}
{.mmi		
	cmp.eq		p6, p0 = 1, r14		
	cmp.eq		p7, p0 = 2, r14		
	cmp.eq		p8, p0 = 3, r14		
}
{.bbb
   (p6)	br.dptk		.Lb001			
   (p7)	br.dptk		.Lb010			
   (p8)	br.dptk		.Lb011			
	;;
}
{.mmi		
	cmp.eq		p9, p0 = 4, r14		
	cmp.eq		p10, p0 = 5, r14	
	cmp.eq		p11, p0 = 6, r14	
}
{.bbb
   (p9)	br.dptk		.Lb100			
  (p10)	br.dptk		.Lb101			
  (p11)	br.dptk		.Lb110			
	;;
}		
{.mmb
	cmp.eq		p12, p0 = 7, r14	
	add		r35 = -1, r35		
  (p12)	br.dptk		.Lb111			
}


.Lb000:	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	add		r3 = 8, r32		
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	add		r9 = r10, r11		
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	cmp.ltu	p7, p0 = r9, r10	
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	add		r8 = r16, r26		
	;;
	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	cmp.ltu	p8, p0 = r8, r16		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	add		r23 = r17, r27		
	;;
	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
	cmp.ltu	p9, p0 = r23, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r8	
   (p7)	add		r8 = 1, r8		
  (p14)	br.cond.dptk	.Lcj8			
	;;

.grt8:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	shr.u		r35 = r35, 3		
	;;
	add		r11 = 512, r34
	ld8		r26 = [r34], 8		
	add		r10 = 512, r33
	ld8		r16 = [r33], 8		
	nop.i		0
	nop.b		0
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	mov.i		ar.lc = r35		
	br		.LL000			

.Lb001:	add		r3 = 16, r32		
	add		r22 = r10, r11		
  (p15)	br.cond.dpnt	.grt1			
	;;
	cmp.ltu	p6, p0 = r22, r10	
	mov		r8 = 0			
	br		.Lcj1			

.grt1:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	shr.u		r35 = r35, 3		
	;;
	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	cmp.ne		p9, p0 = r0, r0		
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	mov.i		ar.lc = r35		
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	cmp.ltu	p6, p0 = r22, r10	
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	add		r9 = r15, r25		
	;;
	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	cmp.ltu	p7, p0 = r9, r15		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	add		r8 = r16, r26		
	;;
	add		r11 = 512, r34
	ld8		r24 = [r34], 8		
	add		r10 = 512, r33
	ld8		r14 = [r33], 8		
	br.cloop.dptk	.Loop			
	br		.Lcj9			

.Lb010:	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
	add		r3 = 24, r32		
	add		r23 = r10, r11		
  (p15)	br.cond.dpnt	.grt2			
	;;
	cmp.ltu	p9, p0 = r23, r10	
	add		r22 = r14, r24		
	br		.Lcj2			

.grt2:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	shr.u		r35 = r35, 3		
	;;
	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	mov.i		ar.lc = r35		
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	cmp.ltu	p9, p0 = r23, r10	
	;;
	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	add		r22 = r14, r24		
	;;
	add		r11 = 512, r34
	ld8		r31 = [r34], 8		
	add		r10 = 512, r33
	ld8		r21 = [r33], 8		
	br		.LL01x			

.Lb011:	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	add		r8 = r10, r11		
	;;
	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
  (p15)	br.cond.dpnt	.grt3			
	;;
	cmp.ltu	p8, p0 = r8, r10	
	add		r23 = r21, r31		
	;;
	st8		[r32] = r8, 8		
	cmp.ltu	p9, p0 = r23, r21		
	br		.Lcj3			

.grt3:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	add		r3 = 32, r32		
	;;
	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	shr.u		r35 = r35, 3		
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	cmp.ltu	p8, p0 = r8, r10	
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	mov.i		ar.lc = r35		
	add		r23 = r21, r31		
	nop.i		0
	nop.b		0
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	cmp.ltu	p9, p0 = r23, r21		
	;;
	add		r11 = 512, r34
	ld8		r30 = [r34], 8		
	add		r10 = 512, r33
	ld8		r20 = [r33], 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
   (p8)	add		r23 = 1, r23		
	st8		[r32] = r8, 8		
	add		r22 = r14, r24		
	br		.LL01x			

.Lb100:	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	add		r3 = 8, r32		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	add		r9 = r10, r11		
	;;
	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
  (p15)	br.cond.dpnt	.grt4			
	;;
	cmp.ltu	p7, p0 = r9, r10	
	add		r8 = r20, r30		
	;;
	cmp.ltu	p8, p0 = r8, r20		
	add		r23 = r21, r31		
	br		.Lcj4			

.grt4:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	shr.u		r35 = r35, 3		
	cmp.ltu	p7, p0 = r9, r10	
	;;
	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	add		r8 = r20, r30		
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	cmp.ltu	p8, p0 = r8, r20		
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	mov.i		ar.lc = r35		
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	add		r23 = r21, r31		
	;;
	add		r11 = 512, r34
	ld8		r30 = [r34], 8		
	add		r10 = 512, r33
	ld8		r20 = [r33], 8		
	cmp.ltu	p9, p0 = r23, r21		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
   (p7)	cmp.eq.or	p8, p0 = -1, r8	
   (p7)	add		r8 = 1, r8		
	br		.LL100			

.Lb101:	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	add		r3 = 16, r32		
	;;
	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	add		r22 = r10, r11		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	cmp.ltu	p6, p0 = r22, r10	
	;;
	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
	add		r9 = r19, r29		
	shr.u		r35 = r35, 3		
  (p15)	br.cond.dpnt	.grt5			
	;;
	cmp.ltu	p7, p0 = r9, r19		
	add		r8 = r20, r30		
	br		.Lcj5			

.grt5:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	;;
	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	mov.i		ar.lc = r35		
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	cmp.ltu	p7, p0 = r9, r19		
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	add		r8 = r20, r30		
	;;
	add		r11 = 512, r34
	ld8		r29 = [r34], 8		
	add		r10 = 512, r33
	ld8		r19 = [r33], 8		
	br		.LL101			

.Lb110:	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	add		r3 = 24, r32		
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	add		r23 = r10, r11		
	;;
	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	shr.u		r35 = r35, 3		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	cmp.ltu	p9, p0 = r23, r10	
	;;
	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
	add		r22 = r18, r28		
  (p14)	br.cond.dptk	.Lcj67			
	;;

.grt6:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	mov.i		ar.lc = r35		
	cmp.ltu	p9, p0 = r23, r10	
	nop.i		0
	nop.b		0
	;;
	ld8		r26 = [r34], 8		
	ld8		r16 = [r33], 8		
	add		r22 = r18, r28		
	;;
	add		r11 = 512, r34
	ld8		r27 = [r34], 8		
	add		r10 = 512, r33
	ld8		r17 = [r33], 8		
	br		.LL11x			

.Lb111:	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	add		r3 = 32, r32		
	;;
	ld8		r28 = [r34], 8		
	ld8		r18 = [r33], 8		
	add		r8 = r10, r11		
	;;
	ld8		r29 = [r34], 8		
	ld8		r19 = [r33], 8		
	cmp.ltu	p8, p0 = r8, r10	
	;;
	ld8		r30 = [r34], 8		
	ld8		r20 = [r33], 8		
	add		r23 = r17, r27		
	;;
	ld8		r31 = [r34], 8		
	ld8		r21 = [r33], 8		
	cmp.ltu	p9, p0 = r23, r17		
	;;
	ld8		r24 = [r34], 8		
	ld8		r14 = [r33], 8		
  (p15)	br.cond.dpnt	.grt7			
	;;
	st8		[r32] = r8, 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
   (p8)	add		r23 = 1, r23		
	add		r22 = r18, r28		
	br		.Lcj67			

.grt7:	ld8		r25 = [r34], 8		
	ld8		r15 = [r33], 8		
	shr.u		r35 = r35, 3		
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
	nop.i		0
	nop.b		0
	;;
	add		r11 = 512, r34
	ld8		r26 = [r34], 8		
	add		r10 = 512, r33
	ld8		r16 = [r33], 8		
   (p8)	add		r23 = 1, r23		
	nop.b		0
	;;
	ld8		r27 = [r34], 8		
	ld8		r17 = [r33], 8		
	mov.i		ar.lc = r35		
	st8		[r32] = r8, 8		
	add		r22 = r18, r28		
	br		.LL11x			


	.align	32
.Loop:	ld8		r25 = [r34], 8		
	cmp.ltu	p7, p0 = r9, r15		
   (p9)	cmp.eq.or	p6, p0 = -1, r22	
	ld8		r15 = [r33], 8		
   (p9)	add		r22 = 1, r22		
	add		r8 = r16, r26		
	;;
	ld8		r26 = [r34], 8		
	cmp.ltu	p8, p0 = r8, r16		
   (p6)	cmp.eq.or	p7, p0 = -1, r9	
	ld8		r16 = [r33], 8		
   (p6)	add		r9 = 1, r9		
	add		r23 = r17, r27		
	;;
	st8		[r32] = r22, 8		
	ld8		r27 = [r34], 8		
	cmp.ltu	p9, p0 = r23, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r8	
	ld8		r17 = [r33], 8		
   (p7)	add		r8 = 1, r8		
	;;
.LL000:	st8		[r32] = r9, 16		
	st8		[r3] = r8, 32		
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
	lfetch		[r10], 64
   (p8)	add		r23 = 1, r23		
	add		r22 = r18, r28		
	;;
.LL11x:	st8		[r32] = r23, 8		
	ld8		r28 = [r34], 8		
	cmp.ltu	p6, p0 = r22, r18		
	ld8		r18 = [r33], 8		
	add		r9 = r19, r29		
	;;
	ld8		r29 = [r34], 8		
	cmp.ltu	p7, p0 = r9, r19		
   (p9)	cmp.eq.or	p6, p0 = -1, r22	
	ld8		r19 = [r33], 8		
   (p9)	add		r22 = 1, r22		
	add		r8 = r20, r30		
	;;
.LL101:	ld8		r30 = [r34], 8		
	cmp.ltu	p8, p0 = r8, r20		
   (p6)	cmp.eq.or	p7, p0 = -1, r9	
	ld8		r20 = [r33], 8		
   (p6)	add		r9 = 1, r9		
	add		r23 = r21, r31		
	;;
	st8		[r32] = r22, 8		
	ld8		r31 = [r34], 8		
	cmp.ltu	p9, p0 = r23, r21		
   (p7)	cmp.eq.or	p8, p0 = -1, r8	
	ld8		r21 = [r33], 8		
   (p7)	add		r8 = 1, r8		
	;;
.LL100:	st8		[r32] = r9, 16		
	st8		[r3] = r8, 32		
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
	lfetch		[r11], 64
   (p8)	add		r23 = 1, r23		
	add		r22 = r14, r24		
	;;
.LL01x:	st8		[r32] = r23, 8		
	ld8		r24 = [r34], 8		
	cmp.ltu	p6, p0 = r22, r14		
	ld8		r14 = [r33], 8		
	add		r9 = r15, r25		
	br.cloop.dptk	.Loop			
	;;


	cmp.ltu	p7, p0 = r9, r15		
   (p9)	cmp.eq.or	p6, p0 = -1, r22	
   (p9)	add		r22 = 1, r22		
	add		r8 = r16, r26		
	;;
.Lcj9:	cmp.ltu	p8, p0 = r8, r16		
   (p6)	cmp.eq.or	p7, p0 = -1, r9	
	st8		[r32] = r22, 8		
   (p6)	add		r9 = 1, r9		
	add		r23 = r17, r27		
	;;
	cmp.ltu	p9, p0 = r23, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r8	
   (p7)	add		r8 = 1, r8		
	;;
.Lcj8:	st8		[r32] = r9, 16		
	st8		[r3] = r8, 32		
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
   (p8)	add		r23 = 1, r23		
	add		r22 = r18, r28		
	;;
.Lcj67:	st8		[r32] = r23, 8		
	cmp.ltu	p6, p0 = r22, r18		
	add		r9 = r19, r29		
	;;
	cmp.ltu	p7, p0 = r9, r19		
   (p9)	cmp.eq.or	p6, p0 = -1, r22	
   (p9)	add		r22 = 1, r22		
	add		r8 = r20, r30		
	;;
.Lcj5:	cmp.ltu	p8, p0 = r8, r20		
   (p6)	cmp.eq.or	p7, p0 = -1, r9	
	st8		[r32] = r22, 8		
   (p6)	add		r9 = 1, r9		
	add		r23 = r21, r31		
	;;
.Lcj4:	cmp.ltu	p9, p0 = r23, r21		
   (p7)	cmp.eq.or	p8, p0 = -1, r8	
   (p7)	add		r8 = 1, r8		
	;;
	st8		[r32] = r9, 16		
	st8		[r3] = r8, 32		
.Lcj3:
   (p8)	cmp.eq.or	p9, p0 = -1, r23	
   (p8)	add		r23 = 1, r23		
	add		r22 = r14, r24		
	;;
.Lcj2:	st8		[r32] = r23, 8		
	cmp.ltu	p6, p0 = r22, r14		
	;;
   (p9)	cmp.eq.or	p6, p0 = -1, r22	
   (p9)	add		r22 = 1, r22		
	mov		r8 = 0			
	;;
.Lcj1:	st8		[r32] = r22, 8		
	mov.i		ar.lc = r2		
   (p6)	mov		r8 = 1			
	br.ret.sptk.many b0			
	
	.endp	__gmpn_add_n#

-------------- next part --------------



































































  
  
  
  
  
  
  







   
   
   

 




	
	.text
	.align	32
	.global	__gmpn_add_nc#
	.proc	__gmpn_add_nc#
__gmpn_add_nc:
	.prologue
	.save	ar.lc, r2
	.body

	addp4	r32 = 0, r32		
	addp4	r33 = 0, r33		
	addp4	r34 = 0, r34		
	zxt4	r35 = r35			
	;;


 {.mmi;	ld8	r11 = [r34], 8		
	ld8	r10 = [r33], 8		
	mov	r2 = ar.lc		
}{.mmi;	and	r14 = 7, r35		
	cmp.lt	p15, p14 = 8, r35		
	add	r35 = -6, r35		
	;;
}
.mmi;	add	r20 = 500, r33	
	add	r21 = 500, r34	
	mov	r23 = r36		
	;;
{.mmi;	cmp.eq	p6, p0 = 1, r14		
	cmp.eq	p7, p0 = 2, r14		
	cmp.eq	p8, p0 = 3, r14		
}{.bbb
   (p6)	br.dptk	.Lc001			
   (p7)	br.dptk	.Lc010			
   (p8)	br.dptk	.Lc011			
	;;
}
{.mmi;	cmp.eq	p9, p0 = 4, r14		
	cmp.eq	p10, p0 = 5, r14	
	cmp.eq	p11, p0 = 6, r14	
}{.bbb
   (p9)	br.dptk	.Lc100			
  (p10)	br.dptk	.Lc101			
  (p11)	br.dptk	.Lc110			
	;;
}{.mmi;	ld8	r19 = [r34], 8		
	ld8	r18 = [r33], 8		
	cmp.ne	p13, p0 = 0, r36		
}{.mmb;	cmp.eq	p12, p0 = 7, r14	
	nop	0
  (p12)	br.dptk	.Lc111			
	;;
}

.Lc000:
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	add	r21 = 500, r34	
	ld8	r24 = [r34], 8		
	mov	ar.lc = r35		
.mmi;	ld8	r14 = [r33], 8		
	add	r29 = r10, r11		
	nop	0
	;;
.mmi;	add	r20 = 500, r33	
	ld8	r25 = [r34], 8		
	cmp.ltu	p7, p0 = r29, r10	
.mmi;	ld8	r15 = [r33], 8		
	add	r30 = r18, r19		
	add	r3 = 8, r32		
	;;
.mmi;	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r18	
  (p13)	cmp.eq.or	p7, p0 = -1, r29	
.mmi;	ld8	r16 = [r33], 8		
  (p13)	add	r29 = 1, r29		
	add	r31 = r17, r27		
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
.mmb;	ld8	r17 = [r33], 8		
   (p7)	add	r30 = 1, r30		
	br	.Lm0


.Lc001:
.mmi;
  (p15)	ld8	r25 = [r34], 8		
  (p15)	ld8	r15 = [r33], 8		
	add	r28 = r10, r11		
.mmb;	nop	0
	nop	0
  (p15)	br	1f
	;;
.mmi;	cmp.ne	p9, p0 = 0, r23		
	mov	r8 = 0
	cmp.ltu	p6, p0 = r28, r10	
	;;
.mmb;
   (p9)	cmp.eq.or	p6, p0 = -1, r28	
   (p9)	add	r28 = 1, r28		
	br	.Lcj1			
1:
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	mov	ar.lc = r35		
.mmi;	nop	0
	cmp.ne	p9, p0 = 0, r23		
	nop	0
	;;
.mmi;	ld8	r24 = [r34], 8		
	cmp.ltu	p6, p0 = r28, r10	
	add	r3 = 16, r32		
.mmb;	ld8	r14 = [r33], 8		
	add	r29 = r15, r25		
	br	.Lc1			


.Lc010:
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	mov	r8 = 0			
.mmb;	add	r31 = r10, r11		
	cmp.ne	p8, p0 = 0, r23		
  (p15)	br	1f			
	;;
.mmi;	cmp.ltu	p9, p0 = r31, r10	
	add	r28 = r14, r24		
   (p8)	add	r31 = 1, r31		
	;;
.mmb;	cmp.ltu	p6, p0 = r28, r14		
   (p8)	cmp.eq.or	p9, p0 = 0, r31	
	br	.Lcj2			
1:
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	mov	ar.lc = r35		
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	cmp.ltu	p9, p0 = r31, r10	
	;;
.mmi;
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
.mmb;	add	r3 = 24, r32		
	nop	0
	br	.Lm23			


.Lc011:
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	shr.u	r35 = r35, 3		
.mmi;	add	r30 = r10, r11		
	cmp.ne	p7, p0 = 0, r23		
	nop	0
	;;
.mmb;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
  (p15)	br	1f			
.mmi;	cmp.ltu	p8, p0 = r30, r10	
	add	r31 = r17, r27		
	nop	0
	;;
.mmb;
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
   (p7)	add	r30 = 1, r30		
	br	.Lcj3			
1:
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	add	r31 = r17, r27		
	;;
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	cmp.ltu	p8, p0 = r30, r10	
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
	mov	ar.lc = r35		
.mmi;	ld8	r17 = [r33], 8		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
   (p7)	add	r30 = 1, r30		
	;;
.mmi;	add	r3 = 32, r32		
	st8	[r32] = r30, 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
.mmb;
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
	br	.Lm23


.Lc100:
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	shr.u	r35 = r35, 3		
.mmi;	add	r29 = r10, r11		
	nop	0
	nop	0
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	add	r3 = 8, r32		
.mmi;	cmp.ne	p6, p0 = 0, r23		
	cmp.ltu	p7, p0 = r29, r10	
	nop	0
	;;
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	add	r30 = r16, r26		
.mmb;
   (p6)	cmp.eq.or	p7, p0 = -1, r29	
   (p6)	add	r29 = 1, r29		
  (p14)	br	.Lcj4
	;;
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	mov	ar.lc = r35		
	;;
.mmi;	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r16		
	nop	0
.mmi;	ld8	r16 = [r33], 8		
	nop	0
	add	r31 = r17, r27		
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
.mmb;	ld8	r17 = [r33], 8		
   (p7)	add	r30 = 1, r30		
	br	.Lm4


.Lc101:
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	mov	ar.lc = r35		
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	add	r28 = r10, r11		
.mmi;	cmp.ne	p9, p0 = 0, r23		
	add	r3 = 16, r32		
	nop	0
	;;
.mmi;	ld8	r24 = [r34], 8		
	cmp.ltu	p6, p0 = r28, r10	
	ld8	r14 = [r33], 8		
.mbb;	add	r29 = r15, r25		
  (p15)	br	.Lc5			
	br	.Lend			


.Lc110:
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	add	r20 = 500, r33	
	add	r21 = 500, r34	
	mov	ar.lc = r35		
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	add	r31 = r10, r11		
	;;
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	add	r28 = r14, r24		
.mmi;	cmp.ltu	p9, p0 = r31, r10	
	cmp.ne	p8, p0 = 0, r23		
	add	r3 = 24, r32		
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	nop	0
.mmb;
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
   (p8)	add	r31 = 1, r31		
	br	.Lm67			


.Lc111:
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	add	r20 = 500, r33	
	ld8	r25 = [r34], 8		
	mov	ar.lc = r35		
.mmi;	ld8	r15 = [r33], 8		
	add	r30 = r10, r11		
	nop	0
	;;
.mmi;	add	r21 = 500, r34	
	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r10	
.mmi;	ld8	r16 = [r33], 8		
	add	r31 = r18, r19		
	nop	0
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r18	
  (p13)	cmp.eq.or	p8, p0 = -1, r30	
.mmi;	ld8	r17 = [r33], 8		
  (p13)	add	r30 = 1, r30		
	nop	0
	;;
.mmi;	add	r3 = 32, r32		
	st8	[r32] = r30, 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
.mmb;
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
	br	.Lm67

	
	.endp	__gmpn_add_nc#



	
	.text
	.align	32
	.global	__gmpn_add_n#
	.proc	__gmpn_add_n#
__gmpn_add_n:
	.prologue
	.save	ar.lc, r2
	.body

	addp4	r32 = 0, r32		
	addp4	r33 = 0, r33		
	addp4	r34 = 0, r34		
	zxt4	r35 = r35			
	;;


 {.mmi;	ld8	r11 = [r34], 8		
	ld8	r10 = [r33], 8		
	mov	r2 = ar.lc		
}{.mmi;	and	r14 = 7, r35		
	cmp.lt	p15, p14 = 8, r35		
	add	r35 = -6, r35		
	;;
}{.mmi;	cmp.eq	p6, p0 = 1, r14		
	cmp.eq	p7, p0 = 2, r14		
	cmp.eq	p8, p0 = 3, r14		
}{.bbb
   (p6)	br.dptk	.Lb001			
   (p7)	br.dptk	.Lb010			
   (p8)	br.dptk	.Lb011			
	;;
}{.mmi;	cmp.eq	p9, p0 = 4, r14		
	cmp.eq	p10, p0 = 5, r14	
	cmp.eq	p11, p0 = 6, r14	
}{.bbb
   (p9)	br.dptk	.Lb100			
  (p10)	br.dptk	.Lb101			
  (p11)	br.dptk	.Lb110			
	;;
}{.mmi;	ld8	r19 = [r34], 8		
	ld8	r18 = [r33], 8		
	cmp.ne	p13, p0 = r0, r0	
}{.mmb;	cmp.eq	p12, p0 = 7, r14	
	mov	r23 = 0			
  (p12)	br.dptk	.Lb111			
	;;
}

.Lb000:
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	add	r29 = r10, r11		
	;;
.mmi;	ld8	r25 = [r34], 8		
	cmp.ltu	p7, p0 = r29, r10	
	mov	ar.lc = r35		
.mmi;	ld8	r15 = [r33], 8		
	add	r30 = r18, r19		
	add	r3 = 8, r32		
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	cmp.ltu	p8, p0 = r30, r18	
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	add	r31 = r17, r27		
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
.mmb;	ld8	r17 = [r33], 8		
   (p7)	add	r30 = 1, r30		
	br	.Lm0			


	.align	32
.Lb001:
.mmi;	add	r28 = r10, r11		
  (p15)	ld8	r25 = [r34], 8		
	mov	r8 = 0			
	;;
.mmb;	cmp.ltu	p6, p0 = r28, r10	
  (p15)	ld8	r15 = [r33], 8		
  (p14)	br	.Lcj1			
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	shr.u	r35 = r35, 3		
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	cmp.ltu	p6, p0 = r28, r10	
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	mov	ar.lc = r35		
	;;
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	add	r29 = r15, r25		
	;;
.mmi;	ld8	r25 = [r34], 8		
	cmp.ltu	p7, p0 = r29, r15		
	add	r30 = r16, r26		
.mmb;	ld8	r15 = [r33], 8		
	add	r3 = 16, r32		
	br	.Lm1			


	.align	32
.Lb010:
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	shr.u	r35 = r35, 3		
.mmb;	add	r31 = r10, r11		
	nop	0
  (p15)	br	.Lgt2			
	;;
.mmi;	cmp.ltu	p9, p0 = r31, r10	
	add	r28 = r14, r24		
	mov	r8 = 0			
	;;
.mmb;	nop	0
	cmp.ltu	p6, p0 = r28, r14		
	br	.Lcj2			
.Lgt2:
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	nop	0
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	mov	ar.lc = r35		
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	nop	0
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r10	
	add	r28 = r14, r24		
.mmb;	ld8	r17 = [r33], 8		
	add	r3 = 24, r32		
	br	.Lm23			


	.align	32
.Lb011:
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	add	r30 = r10, r11		
	;;
.mmb;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
  (p15)	br	1f			
.mmb;	cmp.ltu	p8, p0 = r30, r10	
	add	r31 = r17, r27		
	br	.Lcj3			
1:
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	add	r31 = r17, r27		
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	cmp.ltu	p8, p0 = r30, r10	
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
	mov	ar.lc = r35		
.mmi;	ld8	r17 = [r33], 8		
	nop	0
	nop	0
	;;
.mmi;	add	r3 = 32, r32		
	st8	[r32] = r30, 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
.mmb;
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
	br	.Lm23			


	.align	32
.Lb100:
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	add	r29 = r10, r11		
	;;
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	cmp.ltu	p7, p0 = r29, r10	
.mmb;	nop	0
	add	r30 = r16, r26		
  (p14)	br	.Lcj4			
	;;
.Lgt4:
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	mov	ar.lc = r35		
	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	nop	0
	;;
.mmi;	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r16		
	nop	0
.mmi;	ld8	r16 = [r33], 8		
	add	r31 = r17, r27		
	add	r3 = 8, r32		
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
.mmb;	ld8	r17 = [r33], 8		
   (p7)	add	r30 = 1, r30		
	br	.Lm4			


	.align	32
.Lb101:
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	add	r28 = r10, r11		
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	add	r3 = 16, r32		
	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	nop	0
	;;
.mmi;	ld8	r24 = [r34], 8		
	cmp.ltu	p6, p0 = r28, r10	
	nop	0
.mmb;	ld8	r14 = [r33], 8		
	add	r29 = r15, r25		
  (p14)	br	.Lcj5			
	;;
.Lgt5:
.mmi;	ld8	r25 = [r34], 8		
	cmp.ltu	p7, p0 = r29, r15		
	mov	ar.lc = r35		
.mmb;	ld8	r15 = [r33], 8		
	add	r30 = r16, r26		
	br	.Lm5			


	.align	32
.Lb110:
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	add	r31 = r10, r11		
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	mov	ar.lc = r35		
.mmi;	ld8	r26 = [r34], 8		
	ld8	r16 = [r33], 8		
	nop	0
	;;
.mmi;	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r10	
	add	r28 = r14, r24		
.mmb;	ld8	r17 = [r33], 8		
	add	r3 = 24, r32		
	br	.Lm67			


	.align	32
.Lb111:
.mmi;	ld8	r24 = [r34], 8		
	ld8	r14 = [r33], 8		
	shr.u	r35 = r35, 3		
	;;
.mmi;	ld8	r25 = [r34], 8		
	ld8	r15 = [r33], 8		
	add	r30 = r10, r11		
	;;
.mmi;	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r10	
	mov	ar.lc = r35		
.mmi;	ld8	r16 = [r33], 8		
	add	r31 = r18, r19		
	nop	0
	;;
.mmi;	add	r20 = 500, r33
	add	r21 = 500, r34
	nop	0
.mmi;	ld8	r27 = [r34], 8		
	ld8	r17 = [r33], 8		
	cmp.ltu	p9, p0 = r31, r18	
	;;
.mmi;	add	r3 = 32, r32		
	st8	[r32] = r30, 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
.mmb;
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
	br	.Lm67			



	.align	32
.Ltop:
.Lc5:	ld8	r25 = [r34], 8		
	cmp.ltu	p7, p0 = r29, r15		
   (p9)	cmp.eq.or	p6, p0 = -1, r28	
	ld8	r15 = [r33], 8		
   (p9)	add	r28 = 1, r28		
	add	r30 = r16, r26		
	;;
.Lm5:	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r16		
   (p6)	cmp.eq.or	p7, p0 = -1, r29	
	ld8	r16 = [r33], 8		
   (p6)	add	r29 = 1, r29		
	add	r31 = r17, r27		
	;;
	st8	[r32] = r28, 8		
	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
	ld8	r17 = [r33], 8		
   (p7)	add	r30 = 1, r30		
	;;
.Lm4:	st8	[r32] = r29, 16		
	st8	[r3] = r30, 32		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
	lfetch	[r20], 64
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
	;;
.Lm23:	st8	[r32] = r31, 8		
	ld8	r24 = [r34], 8		
	cmp.ltu	p6, p0 = r28, r14		
	ld8	r14 = [r33], 8		
	add	r29 = r15, r25		
	nop.b	0
	;;
.Lc1:	ld8	r25 = [r34], 8		
	cmp.ltu	p7, p0 = r29, r15		
   (p9)	cmp.eq.or	p6, p0 = -1, r28	
	ld8	r15 = [r33], 8		
   (p9)	add	r28 = 1, r28		
	add	r30 = r16, r26		
	;;
.Lm1:	ld8	r26 = [r34], 8		
	cmp.ltu	p8, p0 = r30, r16		
   (p6)	cmp.eq.or	p7, p0 = -1, r29	
	ld8	r16 = [r33], 8		
   (p6)	add	r29 = 1, r29		
	add	r31 = r17, r27		
	;;
	st8	[r32] = r28, 8		
	ld8	r27 = [r34], 8		
	cmp.ltu	p9, p0 = r31, r17		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
	ld8	r17 = [r33], 8		
   (p7)	add	r30 = 1, r30		
	;;
.Lm0:	st8	[r32] = r29, 16		
	st8	[r3] = r30, 32		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
	lfetch	[r21], 64
   (p8)	add	r31 = 1, r31		
	add	r28 = r14, r24		
	;;
.Lm67:	st8	[r32] = r31, 8		
	ld8	r24 = [r34], 8		
	cmp.ltu	p6, p0 = r28, r14		
	ld8	r14 = [r33], 8		
	add	r29 = r15, r25		
	br.cloop.dptk	.Ltop		
	;;


.Lend:
.mmi;
   (p9)	cmp.eq.or	p6, p0 = -1, r28	
   (p9)	add	r28 = 1, r28		
	mov	ar.lc = r2		
.Lcj5:
.mmi;	cmp.ltu	p7, p0 = r29, r15		
	add	r30 = r16, r26		
	nop	0
	;;
.mmi;	st8	[r32] = r28, 8		
   (p6)	cmp.eq.or	p7, p0 = -1, r29	
   (p6)	add	r29 = 1, r29		
.Lcj4:
.mmi;	cmp.ltu	p8, p0 = r30, r16		
	add	r31 = r17, r27		
	nop	0
	;;
.mmi;	st8	[r32] = r29, 8		
   (p7)	cmp.eq.or	p8, p0 = -1, r30	
   (p7)	add	r30 = 1, r30		
.Lcj3:
.mmi;	cmp.ltu	p9, p0 = r31, r17		
	add	r28 = r14, r24		
	nop	0
	;;
.mmi;	st8	[r32] = r30, 8		
   (p8)	cmp.eq.or	p9, p0 = -1, r31	
   (p8)	add	r31 = 1, r31		
.mmi;	cmp.ltu	p6, p0 = r28, r14		
	nop	0
	mov	r8 = 0			
	;;
.Lcj2:
.mmi;	st8	[r32] = r31, 8		
   (p9)	cmp.eq.or	p6, p0 = -1, r28	
   (p9)	add	r28 = 1, r28		
	;;
.Lcj1:
.mmb;	st8	[r32] = r28, 8		
   (p6)	mov	r8 = 1			
	br.ret.sptk.many b0		
	
	.endp	__gmpn_add_n#



More information about the gmp-bugs mailing list