Small operands gcd improvements
Marco Bodrato
bodrato at mail.dm.unipi.it
Tue Aug 13 01:17:12 UTC 2019
Ciao,
Il Sab, 10 Agosto 2019 7:01 pm, Torbjörn Granlund ha scritto:
> We might provide several gcc_11 function variants to accomodate the
> internal uses you bring up.
>
> gcd_1o1o - two odd limbs
> gcd_1o1 - one odd and one odd/even limb
> gcd_11 - two odd/even limbs
This would be a rich set of entry points...
This means we are currently work on the _1o1o variants.
May I propose a small latency-micro-optimisation for two x86_64 just
proposed variants? The idea is not to use the register %r10 at all, and
directly keep the value of v0 in %rax, so that it is already in place when
the function returns.
-----------------------------------------------
diff -Nrc6 gmp.b252c7e4f9b6/mpn/x86_64/bd2/gcd_11.asm
gmp/mpn/x86_64/bd2/gcd_11.asm
*** gmp.b252c7e4f9b6/mpn/x86_64/bd2/gcd_11.asm 2019-08-08
16:29:36.000000000 +0200
--- gmp/mpn/x86_64/bd2/gcd_11.asm 2019-08-13 02:55:44.287847000 +0200
***************
*** 69,93 ****
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
! mov v0, %r10 C
! sub u0, %r10 C
jz L(end) C
ALIGN(16) C K10 BD1 BD2 ZEN CNR NHM SBR
! L(top): rep;bsf %r10, %rcx C tzcnt! 3 3 3 2 6 5 5
mov u0, %r9 C 2 2 2 2 3 3 4
! sub v0, u0 C 2 2 2 2 4 3 4
! cmovc %r10, u0 C if x-y < 0 0,3 0,3 0,3 0,3 0,6 0,5 0,5
! cmovc %r9, v0 C use x,y-x 0,3 0,3 0,3 0,3 2,8 1,7 1,7
shr R8(%rcx), u0 C 1,7 1,6 1,5 1,4 2,8 2,8 2,8
! mov v0, %r10 C 1 1 1 1 4 3 3
! sub u0, %r10 C 2 2 2 1 5 4 4
jnz L(top) C
! L(end): mov v0, %rax
! FUNC_EXIT()
ret
EPILOGUE()
--- 69,92 ----
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
! mov v0, %rax C
! sub u0, v0 C
jz L(end) C
ALIGN(16) C K10 BD1 BD2 ZEN CNR NHM SBR
! L(top): rep;bsf v0, %rcx C tzcnt! 3 3 3 2 6 5 5
mov u0, %r9 C 2 2 2 2 3 3 4
! sub %rax, u0 C 2 2 2 2 4 3 4
! cmovc v0, u0 C if x-y < 0 0,3 0,3 0,3 0,3 0,6 0,5 0,5
! cmovc %r9, %rax C use x,y-x 0,3 0,3 0,3 0,3 2,8 1,7 1,7
shr R8(%rcx), u0 C 1,7 1,6 1,5 1,4 2,8 2,8 2,8
! mov %rax, v0 C 1 1 1 1 4 3 3
! sub u0, v0 C 2 2 2 1 5 4 4
jnz L(top) C
! L(end): FUNC_EXIT()
ret
EPILOGUE()
diff -Nrc6 gmp.b252c7e4f9b6/mpn/x86_64/core2/gcd_11.asm
gmp/mpn/x86_64/core2/gcd_11.asm
*** gmp.b252c7e4f9b6/mpn/x86_64/core2/gcd_11.asm 2019-08-08
16:29:36.000000000 +0200
--- gmp/mpn/x86_64/core2/gcd_11.asm 2019-08-13 02:55:44.287847000 +0200
***************
*** 69,93 ****
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
! mov v0, %r10 C
! sub u0, %r10 C
jz L(end) C
ALIGN(16) C K10 BD1 CNR NHM SBR
! L(top): bsf %r10, %rcx C 3 3 6 5 5
mov u0, %r9 C 2 2 3 3 4
! sub v0, u0 C 2 2 4 3 4
! cmovc %r10, u0 C if x-y < 0 0,3 0,3 0,6 0,5 0,5
! cmovc %r9, v0 C use x,y-x 0,3 0,3 2,8 1,7 1,7
shr R8(%rcx), u0 C 1,7 1,6 2,8 2,8 2,8
! mov v0, %r10 C 1 1 4 3 3
! sub u0, %r10 C 2 2 5 4 4
jnz L(top) C
! L(end): mov v0, %rax
! FUNC_EXIT()
ret
EPILOGUE()
--- 69,92 ----
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_gcd_11)
FUNC_ENTRY(2)
! mov v0, %rax C
! sub u0, v0 C
jz L(end) C
ALIGN(16) C K10 BD1 CNR NHM SBR
! L(top): bsf v0, %rcx C 3 3 6 5 5
mov u0, %r9 C 2 2 3 3 4
! sub %rax, u0 C 2 2 4 3 4
! cmovc v0, u0 C if x-y < 0 0,3 0,3 0,6 0,5 0,5
! cmovc %r9, %rax C use x,y-x 0,3 0,3 2,8 1,7 1,7
shr R8(%rcx), u0 C 1,7 1,6 2,8 2,8 2,8
! mov %rax, v0 C 1 1 4 3 3
! sub u0, v0 C 2 2 5 4 4
jnz L(top) C
! L(end): FUNC_EXIT()
ret
EPILOGUE()
-----------------------------------------------
Ĝis,
m
--
http://bodrato.it/papers/
More information about the gmp-devel
mailing list