Athlon XP Single limb multiply
Bowers, Rickey N,,DMDCWEST
Bowersrn at osd.pentagon.mil
Thu Dec 18 13:04:27 CET 2003
This code is in MASM syntax and always runs at ~3 cycles a limb. The only
improvement is to reduce the code size and unroll to process 32 limbs at
once. Here is loop code that gets unrolled:
mul ebp
add ebx, eax
mov eax, [esi][i]
mov [esi][i][-4], ebx
mov ebx, ecx
adc ebx, edx
The following is the whole routine I use:
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
ALIGN _CODE_ALIGNMENT_
; loop range from 2 thru 62
;32: 3.0700 cycles per limb (DWORD)
mpn_mul_1__k7_04_UNROLL = 32
mpn_mul_1__k7_04 PROC USES ebx esi edi, bInt:PTR BIGINT, limb:DWORD,
carry:DWORD
pBIGINT EQU <[esp+4*(4+1)]>
_limb EQU <[esp+4*(4+2)]>
_carry EQU <[esp+4*(4+3)]>
push ebp
push ebx
push esi
push edi
mov esi, pBIGINT
IF 32 EQ mpn_mul_1__k7_04_UNROLL
mov ecx, mpn_mul_1__k7_04_UNROLL
mov edi, [esi].BIGINT.limbs - SIZEOF BIGINT
mov edx, edi
shr edi, 5 ; divide by 32
and edx, 1Fh ; remainder
ELSE
mov ecx, mpn_mul_1__k7_04_UNROLL
mov eax, [esi].BIGINT.limbs - SIZEOF BIGINT
cdq
div ecx
mov edi, eax
ENDIF
imul ecx, edx, -14 ; mpn_mul_1__k7_04_LIMB_CODE
mov eax, [esi] ; [-4*mpn_mul_1__k7_03_UNROLL]
mov ebp, _limb ; multiplier
mov ebx, _carry ; carry
; adjust ESI to point at correct block of data to allow greater
; unroll range while still having only a bytes offset:
; each block can be a maximum of 128 bytes
;
; ESI points at BIGINT[0], but needs to point at BIGINT[80h -
(BIGINT.limbs MOD UNROLL) + UNROLL]
;
; ESI = ESI + 4*(UNROLL - (BIGINT.limbs MOD UNROLL))
sub edx, 31
shl edx, 2
add esi, edx
; jump to complete partial roll first
lea edx, [ecx][_2]
xor ecx, ecx
jmp edx
ALIGN 16
_0:
i=80h - 4*mpn_mul_1__k7_04_UNROLL
WHILE i LT 80h
mul ebp
add ebx, eax
IF i EQ 0
BYTE 8Bh, 46h, 00h
mov [esi][i][-4], ebx
ELSEIF i EQ 4
mov eax, [esi][i]
BYTE 89h, 5Eh, 00h
ELSEIF i GT -80h
mov eax, [esi][i]
mov [esi][i][-4], ebx
ELSE ; only byte offsets
.err mpn_mul_1__k7_04_UNROLL too big!
ENDIF
mov ebx, ecx
adc ebx, edx
i=i+4
ENDM
_2: dec edi
lea esi, [esi + 4*mpn_mul_1__k7_04_UNROLL]
jns _0
mov eax, ebx ; return carry
pop edi
pop esi
pop ebx
pop ebp
retn 4*3
mpn_mul_1__k7_04 ENDP
* mailto:bowersrn at osd.pentagon.mil <mailto:bowersrn at osd.pentagon.mil>
* 831.583.2500 Ex.5339
-------------- next part --------------
An HTML attachment was scrubbed...
URL: /list-archives/gmp-devel/attachments/20031218/c4d9b492/attachment-0001.htm
More information about the gmp-devel
mailing list