mpn_zero_p
Torbjörn Granlund
tg at gmplib.org
Wed Jun 24 10:00:41 UTC 2015
bodrato at mail.dm.unipi.it writes:
mpn/x86_64/fastsse/com.asm does support zero size with an initial
test n, n
jz L(don)
while neither the generic C function for the library ( mpn/generic/com.c ),
nor the inlined version in gmp-impl.h does.
I took a quick look at the code in that directory. It can surely be
polished. What about this initial patch:
diff -Nrc2 gmp-main.e5ccd9546c7d/mpn/x86_64/fastsse/com.asm gmp-main/mpn/x86_64/fastsse/com.asm
*** gmp-main.e5ccd9546c7d/mpn/x86_64/fastsse/com.asm Wed Jun 24 11:59:18 2015
--- gmp-main/mpn/x86_64/fastsse/com.asm Wed Jun 24 11:59:18 2015
***************
*** 79,85 ****
FUNC_ENTRY(3)
- test n, n
- jz L(don)
-
pcmpeqb %xmm7, %xmm7 C set to 111...111
--- 79,82 ----
diff -Nrc2 gmp-main.e5ccd9546c7d/mpn/x86_64/fastsse/copyd.asm gmp-main/mpn/x86_64/fastsse/copyd.asm
*** gmp-main.e5ccd9546c7d/mpn/x86_64/fastsse/copyd.asm Wed Jun 24 11:59:18 2015
--- gmp-main/mpn/x86_64/fastsse/copyd.asm Wed Jun 24 11:59:18 2015
***************
*** 81,87 ****
FUNC_ENTRY(3)
- test n, n
- jz L(don)
-
lea -16(rp,n,8), rp
lea -16(up,n,8), up
--- 81,84 ----
diff -Nrc2 gmp-main.e5ccd9546c7d/mpn/x86_64/fastsse/copyi.asm gmp-main/mpn/x86_64/fastsse/copyi.asm
*** gmp-main.e5ccd9546c7d/mpn/x86_64/fastsse/copyi.asm Wed Jun 24 11:59:18 2015
--- gmp-main/mpn/x86_64/fastsse/copyi.asm Wed Jun 24 11:59:18 2015
***************
*** 81,85 ****
FUNC_ENTRY(3)
! cmp $3, n
jc L(bc)
--- 81,85 ----
FUNC_ENTRY(3)
! cmp $3, n C NB: bc code below assumes this limit
jc L(bc)
***************
*** 144,150 ****
ALIGN(16)
1:
! L(end): test $1, R8(n)
jz 1f
! mov (up), %r8
mov %r8, (rp)
1:
--- 144,150 ----
ALIGN(16)
1:
! test $1, R8(n)
jz 1f
! L(1): mov (up), %r8
mov %r8, (rp)
1:
***************
*** 156,174 ****
L(bc): sub $2, n
! jc L(end)
ALIGN(16)
1: mov (up), %rax
mov 8(up), %rcx
! lea 16(up), up
mov %rax, (rp)
mov %rcx, 8(rp)
! lea 16(rp), rp
! sub $2, n
! jnc 1b
test $1, R8(n)
jz L(ret)
! mov (up), %rax
! mov %rax, (rp)
L(ret): FUNC_EXIT()
ret
--- 156,174 ----
L(bc): sub $2, n
! jc L(1)
ALIGN(16)
1: mov (up), %rax
mov 8(up), %rcx
! dnl lea 16(up), up
mov %rax, (rp)
mov %rcx, 8(rp)
! dnl lea 16(rp), rp
! dnl sub $2, n
! dnl jnc 1b
test $1, R8(n)
jz L(ret)
! mov 16(up), %rax
! mov %rax, 16(rp)
L(ret): FUNC_EXIT()
ret
--
Torbjörn
Please encrypt, key id 0xC8601622
More information about the gmp-devel
mailing list