Please update addaddmul_1msb0.asm to support ABI in mingw64

Niels Möller nisse at lysator.liu.se
Fri Oct 8 20:13:24 UTC 2021


nisse at lysator.liu.se (Niels Möller) writes:

>> dnl  AMD64 mpn_addsubmul_1msb0, R = Au - Bv, u,v < 2^63.
>
> This comment obviously wrong ;-)
>
> But that function could be implemented by adding two "not %rdx" in the
> right places of the loop, plus small adjustment just before and after
> the loop.
>
> Since
>
>  Au - Bv = Au + (2^{64 n} - 1 - B) v - 2^{64 n} v + v
>
> So complement B on the fly, set initial carry limb to v, and subtract v from
> the return value. (Same trick as in arm/v7a/cora15/submul_1).
>
> Should definitely be worth a try, before trying some completely
> different loop.

Tried now, below implementation appears to work fine.

But considerably slower. Those extra not instruction appears to cost one
cycle per limb on my machine: addmul_1 at 2 c/l, addaddmul_1msb0 at 3
c/l, and addsubmul_1msb0 at 4 c/l. I didn't expect that much.

Regards,
/Niels


---------8<----------

dnl  AMD64 mpn_addsubmul_1msb0, R = Au - Bv, u,v < 2^63.

dnl  Contributed to the GNU project by Niels Möller and Torbjörn Granlund.

dnl  Copyright 2021 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.
dnl
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of either:
dnl
dnl    * the GNU Lesser General Public License as published by the Free
dnl      Software Foundation; either version 3 of the License, or (at your
dnl      option) any later version.
dnl
dnl  or
dnl
dnl    * the GNU General Public License as published by the Free Software
dnl      Foundation; either version 2 of the License, or (at your option) any
dnl      later version.
dnl
dnl  or both in parallel, as here.
dnl
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
dnl  for more details.
dnl
dnl  You should have received copies of the GNU General Public License and the
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
dnl  see https://www.gnu.org/licenses/.

include(`../config.m4')

C INPUT PARAMETERS
define(`rp',	`%rdi')
define(`ap',	`%rsi')
define(`bp_param', `%rdx')
define(`n',	`%rcx')
define(`u0',	`%r8')
define(`v0',	`%r9')

define(`bp', `%rbx')

define(`c0', `%rax')	C carry limb, and return value
define(`l0', `%r10')
define(`l1', `%r11')
define(`hi', `%rbp')

ABI_SUPPORT(DOS64)
ABI_SUPPORT(STD64)

ASM_START()
	TEXT
	ALIGN(16)
PROLOGUE(mpn_addsubmul_1msb0)
        FUNC_ENTRY(4)
IFDOS(`	mov	56(%rsp), %r8	')
IFDOS(`	mov	64(%rsp), %r9	')

	push	%rbx
	push	%rbp

	lea	(ap,n,8), ap
	lea	(bp_param,n,8), bp
	lea	(rp,n,8), rp
	neg	n

	xor	R32(c0), R32(c0)	C Also clears CF and OF
	mov	v0, c0
	test	$1, R32(n)
	jnz	L(mid)

	ALIGN(16)
L(top):	mov	(ap,n,8), %rdx
	mulx(	u0, l0, hi)
	mov	(bp,n,8), %rdx
	not	%rdx
	adox(	c0, l0)
	mulx(	v0, l1, c0)
	adox(	hi, c0)
	adc	l0, l1
	mov	l1, (rp,n,8)
	inc	n			C Clears OF (since n != 2^63 - 1)
L(mid):	mov	(ap,n,8), %rdx
	mulx(	u0, l0, hi)
	mov	(bp,n,8), %rdx
	not	%rdx
	adox(	c0, l0)
	mulx(	v0, l1, c0)
	adox(	hi, c0)
	adc	l0, l1
	mov	l1, (rp,n,8)
	inc	n
	jnz	L(top)

L(end): adc	$0, c0
	sub	v0, c0
	pop	%rbp
	pop	%rbx
	FUNC_EXIT()
	ret
EPILOGUE()

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.


More information about the gmp-devel mailing list