[Gmp-commit] /var/hg/gmp: 20 new changesets

mercurial at gmplib.org mercurial at gmplib.org
Thu Aug 1 01:56:57 CEST 2013


details:   /var/hg/gmp/rev/710180e48a66
changeset: 15880:710180e48a66
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 23 16:51:40 2013 +0200
description:
Supplement c/l table.

details:   /var/hg/gmp/rev/a9b6d3fff782
changeset: 15881:a9b6d3fff782
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 23 16:54:31 2013 +0200
description:
New file, grabbing atom code.

details:   /var/hg/gmp/rev/1ce7e0e21bfe
changeset: 15882:1ce7e0e21bfe
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 23 16:56:41 2013 +0200
description:
New file, grabbing atom code.

details:   /var/hg/gmp/rev/f951cbab95f5
changeset: 15883:f951cbab95f5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 23 17:02:15 2013 +0200
description:
Remove a spurious emms insn.

details:   /var/hg/gmp/rev/382166369c47
changeset: 15884:382166369c47
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 23 17:05:52 2013 +0200
description:
Supplement c/l table.

details:   /var/hg/gmp/rev/5bc50dccd0a5
changeset: 15885:5bc50dccd0a5
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 23 17:07:40 2013 +0200
description:
Add bobcat/aors_n.asm.

details:   /var/hg/gmp/rev/f6b53a251c26
changeset: 15886:f6b53a251c26
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Tue Jul 30 16:11:32 2013 +0200
description:
Provide sandybridge popcount.

details:   /var/hg/gmp/rev/040ace104c7c
changeset: 15887:040ace104c7c
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 14:09:19 2013 +0200
description:
Add cnd_add_n and cnd_sub_n as fat functions.

details:   /var/hg/gmp/rev/ddedd84a1809
changeset: 15888:ddedd84a1809
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 14:15:35 2013 +0200
description:
Keep book of David Miller's contributions.

details:   /var/hg/gmp/rev/9ee420c1f6e2
changeset: 15889:9ee420c1f6e2
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 14:18:21 2013 +0200
description:
(x86): Add Haswell-specific path.

details:   /var/hg/gmp/rev/77e0b07ca1f6
changeset: 15890:77e0b07ca1f6
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 14:19:27 2013 +0200
description:
Provide fast haswell mul_1.

details:   /var/hg/gmp/rev/726cb6e95e5b
changeset: 15891:726cb6e95e5b
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 14:19:37 2013 +0200
description:
ChangeLog

details:   /var/hg/gmp/rev/f0d060895963
changeset: 15892:f0d060895963
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 15:19:08 2013 +0200
description:
Provide atom mul_1/addmul_1/submul_1.

details:   /var/hg/gmp/rev/319e69ca8320
changeset: 15893:319e69ca8320
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 16:36:53 2013 +0200
description:
Provide atom mul_2.

details:   /var/hg/gmp/rev/5736948cccad
changeset: 15894:5736948cccad
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 17:18:18 2013 +0200
description:
Provide atom addmul_2.

details:   /var/hg/gmp/rev/e987b4bd2285
changeset: 15895:e987b4bd2285
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 17:19:32 2013 +0200
description:
Minor tweaks of new code.

details:   /var/hg/gmp/rev/8e69da54b204
changeset: 15896:8e69da54b204
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Wed Jul 31 17:31:52 2013 +0200
description:
News items.

details:   /var/hg/gmp/rev/26199c361d74
changeset: 15897:26199c361d74
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 01 00:56:11 2013 +0200
description:
Provide fast haswell addmul_1 and submul_1.

details:   /var/hg/gmp/rev/8c7e6c0fad40
changeset: 15898:8c7e6c0fad40
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 01 01:05:09 2013 +0200
description:
Put new haswell mulx-dependent files in appropriate subdir.

details:   /var/hg/gmp/rev/c61b482e52c1
changeset: 15899:c61b482e52c1
user:      Torbjorn Granlund <tege at gmplib.org>
date:      Thu Aug 01 01:56:41 2013 +0200
description:
ChangeLog

diffstat:

 AUTHORS                                |    8 +
 ChangeLog                              |   37 ++++++
 NEWS                                   |    3 +-
 configure.ac                           |    5 +-
 gmp-impl.h                             |    6 +
 mpn/x86/fat/fat.c                      |    2 +
 mpn/x86/x86-defs.m4                    |    2 +
 mpn/x86_64/aors_n.asm                  |    9 +-
 mpn/x86_64/atom/addmul_2.asm           |  175 +++++++++++++++++++++++++++++++
 mpn/x86_64/atom/aorrlsh1_n.asm         |    7 +-
 mpn/x86_64/atom/aorsmul_1.asm          |  181 ++++++++++++++++++++++++++++++++
 mpn/x86_64/atom/mul_1.asm              |  133 +++++++++++++++++++++++
 mpn/x86_64/atom/mul_2.asm              |  175 +++++++++++++++++++++++++++++++
 mpn/x86_64/atom/sublsh1_n.asm          |    5 +-
 mpn/x86_64/bd1/aorrlsh1_n.asm          |   26 ++++
 mpn/x86_64/bd1/sublsh1_n.asm           |   26 ++++
 mpn/x86_64/bobcat/aors_n.asm           |  140 +++++++++++++++++++++++++
 mpn/x86_64/coreihwl/mulx/aorsmul_1.asm |  184 +++++++++++++++++++++++++++++++++
 mpn/x86_64/coreihwl/mulx/mul_1.asm     |  142 +++++++++++++++++++++++++
 mpn/x86_64/coreisbr/mul_1.asm          |    5 +-
 mpn/x86_64/coreisbr/popcount.asm       |  110 +++++++++++++++++++
 mpn/x86_64/fat/fat.c                   |    2 +
 mpn/x86_64/pentium4/aorslshC_n.asm     |    1 -
 mpn/x86_64/x86_64-defs.m4              |    2 +
 24 files changed, 1374 insertions(+), 12 deletions(-)

diffs (truncated from 1605 to 300 lines):

diff -r 67a9fdd52e18 -r c61b482e52c1 AUTHORS
--- a/AUTHORS	Mon Jul 22 15:08:25 2013 +0200
+++ b/AUTHORS	Thu Aug 01 01:56:41 2013 +0200
@@ -77,3 +77,11 @@
 Martin Boij		mpn/generic/perfpow.c
 
 Marc Glisse		gmpxx.h improvements
+
+David Miller		mpn/sparc32/ultrasparct1/{addmul_1,mul_1,submul_1}.asm
+			mpn/sparc64/ultrasparct3/{mul_1,addmul_1,submul_1}.asm
+			mpn/sparc64/ultrasparct3/{add_n,sub_n}.asm
+			mpn/sparc64/ultrasparct3/{popcount,hamdist}.asm
+			mpn/sparc64/ultrasparct3/cnd_aors_n.asm
+			mpn/sparc64/{rshift,lshift,lshiftc}.asm
+			mpn/sparc64/tabselect.asm
diff -r 67a9fdd52e18 -r c61b482e52c1 ChangeLog
--- a/ChangeLog	Mon Jul 22 15:08:25 2013 +0200
+++ b/ChangeLog	Thu Aug 01 01:56:41 2013 +0200
@@ -1,3 +1,40 @@
+2013-08-01  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/coreihwl/aorsmul_1.asm: New file.
+
+2013-07-31  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/atom/mul_2.asm: New file.
+	* mpn/x86_64/atom/addmul_2.asm: New file.
+	* mpn/x86_64/atom/mul_1.asm: New file.
+	* mpn/x86_64/atom/aorsmul_1.asm: New file.
+
+	* mpn/x86_64/coreihwl/mul_1.asm: New file.
+
+	* configure.ac (x86): Add Haswell-specific path.
+
+	* configure.in (fat_functions): Add cnd_add_n, cnd_sub_n..
+	* gmp-impl.h (struct cpuvec_t): Add fields for new fat functions.
+	* gmp-impl.h: Adjust corresponding declarations.
+
+	* mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): Add new fat functions.
+	* mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): Likewise.
+	* mpn/x86_64/fat/fat.c (__gmpn_cpuvec): Likewise.
+	* mpn/x86/fat/fat.c (__gmpn_cpuvec): Likewise.
+
+2013-07-30  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/coreisbr/popcount.asm: New file.
+
+2013-07-23  Torbjorn Granlund  <tege at gmplib.org>
+
+	* mpn/x86_64/bobcat/aors_n.asm: New file.
+
+	* mpn/x86_64/pentium4/aorslshC_n.asm: Remove a spurious emms insn.
+
+	* mpn/x86_64/bd1/aorrlsh1_n.asm: New file.
+	* mpn/x86_64/bd1/sublsh1_n.asm: New file.
+
 2013-07-22  Torbjorn Granlund  <tege at gmplib.org>
 
 	* mpn/powerpc64/mode64/mod_1_1.asm: Handle little-endian mode.
diff -r 67a9fdd52e18 -r c61b482e52c1 NEWS
--- a/NEWS	Mon Jul 22 15:08:25 2013 +0200
+++ b/NEWS	Thu Aug 01 01:56:41 2013 +0200
@@ -21,7 +21,7 @@
   FEATURES
   * Support for new Intel and AMD CPUs.
 
-  * Support for ARM64 alias Aarch64.
+  * Support for ARM64 alias Aarch64 alias ARMv8.
 
   * New functions mpn_cnd_add_n and mpn_cnd_sub_n. Side-channel silent
     conditional addition and subtraction.
@@ -31,6 +31,7 @@
     See the documentation for mpz_limbs_read and related functions.
 
   MISC
+  * None.
 
 
 Changes between GMP version 5.1.1 and 5.1.2
diff -r 67a9fdd52e18 -r c61b482e52c1 configure.ac
--- a/configure.ac	Mon Jul 22 15:08:25 2013 +0200
+++ b/configure.ac	Thu Aug 01 01:56:41 2013 +0200
@@ -1737,7 +1737,7 @@
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
 	gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
 	path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86/mmx x86"
-	path_64="x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+	path_64="x86_64/coreihwl/mulx x86_64/coreihwl x86_64/mulx x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
 	;;
       coreibwl)
 	gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
@@ -2076,7 +2076,8 @@
       gcc_cflags_cpu=""
       gcc_cflags_arch=""
 
-      fat_functions="add_n addmul_1 bdiv_dbm1c com copyd copyi dive_1 divrem_1
+      fat_functions="add_n addmul_1 bdiv_dbm1c com cnd_add_n cnd_sub_n
+		     copyd copyi dive_1 divrem_1
 		     gcd_1 lshift lshiftc mod_1 mod_1_1 mod_1_1_cps mod_1_2
 		     mod_1_2_cps mod_1_4 mod_1_4_cps mod_34lsub1 mode1o mul_1
 		     mul_basecase mullo_basecase pre_divrem_1 pre_mod_1 redc_1
diff -r 67a9fdd52e18 -r c61b482e52c1 gmp-impl.h
--- a/gmp-impl.h	Mon Jul 22 15:08:25 2013 +0200
+++ b/gmp-impl.h	Thu Aug 01 01:56:41 2013 +0200
@@ -68,6 +68,10 @@
   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
 #define DECL_bdiv_dbm1c(name) \
   __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_cnd_add_n(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_cnd_sub_n(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
 #define DECL_com(name) \
   __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
 #define DECL_copyd(name) \
@@ -4591,6 +4595,8 @@
   DECL_addmul_1        ((*addmul_1));
   DECL_addmul_2        ((*addmul_2));
   DECL_bdiv_dbm1c      ((*bdiv_dbm1c));
+  DECL_cnd_add_n       ((*cnd_add_n));
+  DECL_cnd_sub_n       ((*cnd_sub_n));
   DECL_com             ((*com));
   DECL_copyd           ((*copyd));
   DECL_copyi           ((*copyi));
diff -r 67a9fdd52e18 -r c61b482e52c1 mpn/x86/fat/fat.c
--- a/mpn/x86/fat/fat.c	Mon Jul 22 15:08:25 2013 +0200
+++ b/mpn/x86/fat/fat.c	Thu Aug 01 01:56:41 2013 +0200
@@ -141,6 +141,8 @@
   __MPN(addmul_1_init),
   0,
   __MPN(bdiv_dbm1c_init),
+  __MPN(cnd_add_n_init),
+  __MPN(cnd_sub_n_init),
   __MPN(com_init),
   __MPN(copyd_init),
   __MPN(copyi_init),
diff -r 67a9fdd52e18 -r c61b482e52c1 mpn/x86/x86-defs.m4
--- a/mpn/x86/x86-defs.m4	Mon Jul 22 15:08:25 2013 +0200
+++ b/mpn/x86/x86-defs.m4	Thu Aug 01 01:56:41 2013 +0200
@@ -63,6 +63,8 @@
 `addmul_1',
 `addmul_2',
 `bdiv_dbm1c',
+`cnd_add_n',
+`cnd_sub_n',
 `com',
 `copyd',
 `copyi',
diff -r 67a9fdd52e18 -r c61b482e52c1 mpn/x86_64/aors_n.asm
--- a/mpn/x86_64/aors_n.asm	Mon Jul 22 15:08:25 2013 +0200
+++ b/mpn/x86_64/aors_n.asm	Thu Aug 01 01:56:41 2013 +0200
@@ -23,10 +23,13 @@
 C	     cycles/limb
 C AMD K8,K9	 1.5
 C AMD K10	 1.5
-C Intel P4	 ?
+C AMD bd1	 1.8
+C AMD bobcat	 2.5
+C Intel P4
 C Intel core2	 4.9
 C Intel NHM	 5.5
-C Intel SBR	 1.59
+C Intel SBR	 1.61
+C Intel IBR	 1.61
 C Intel atom	 4
 C VIA nano	 3.25
 
@@ -38,7 +41,7 @@
 define(`up',	`%rsi')	C rdx
 define(`vp',	`%rdx')	C r8
 define(`n',	`%rcx')	C r9
-define(`cy',	`%r8')	C rsp+40    (only for mpn_add_nc)
+define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
 
 ifdef(`OPERATION_add_n', `
 	define(ADCSBB,	      adc)
diff -r 67a9fdd52e18 -r c61b482e52c1 mpn/x86_64/atom/addmul_2.asm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mpn/x86_64/atom/addmul_2.asm	Thu Aug 01 01:56:41 2013 +0200
@@ -0,0 +1,175 @@
+dnl  AMD64 mpn_addmul_2 optimised for Intel Atom.
+
+dnl  Copyright 2008, 2011, 2012, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C	     cycles/limb	best
+C AMD K8,K9
+C AMD K10
+C AMD bd1
+C AMD bd2
+C AMD bobcat
+C AMD jaguar
+C Intel P4
+C Intel PNR
+C Intel NHM
+C Intel SBR
+C Intel IBR
+C Intel HWL
+C Intel BWL
+C Intel atom	18.8		this
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`vp',      `%rcx')   C r9
+
+define(`v0', `%r8')
+define(`v1', `%r9')
+define(`w0', `%rbx')
+define(`w1', `%rcx')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+define(`n',  `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_addmul_2)
+	FUNC_ENTRY(4)
+	push	%rbx
+	push	%rbp
+
+	mov	(up), %rax
+
+	mov	(vp), v0
+	mov	8(vp), v1
+
+	mov	n_param, n
+	mul	v0
+
+	test	$1, R8(n)
+	jnz	L(bx1)
+
+L(bx0):	test	$2, R8(n)
+	jnz	L(b10)
+
+L(b00):	mov	%rax, w0
+	mov	(up), %rax
+	mov	%rdx, w1
+	xor	R32(w2), R32(w2)
+	lea	-8(rp), rp
+	jmp	L(lo0)
+
+L(b10):	mov	%rax, w2
+	mov	(up), %rax
+	mov	%rdx, w3
+	xor	R32(w0), R32(w0)
+	lea	-16(up), up
+	lea	-24(rp), rp
+	jmp	L(lo2)
+
+L(bx1):	test	$2, R8(n)
+	jnz	L(b11)
+
+L(b01):	mov	%rax, w3
+	mov	%rdx, w0
+	mov	(up), %rax
+	xor	R32(w1), R32(w1)
+	lea	8(up), up
+	dec	n
+	jmp	L(lo1)
+
+L(b11):	mov	%rax, w1
+	mov	(up), %rax
+	mov	%rdx, w2
+	xor	R32(w3), R32(w3)
+	lea	-8(up), up
+	lea	-16(rp), rp
+	jmp	L(lo3)
+
+	ALIGN(16)
+L(top):
+L(lo1):	mul	v1
+	add	w3, (rp)
+	mov	$0, R32(w2)
+	adc	%rax, w0
+	mov	(up), %rax


More information about the gmp-commit mailing list