x86-64 fixes: Part 4
Andreas Jaeger
aj@suse.de
Thu, 24 Oct 2002 14:46:31 +0200
These patches populate the new mpn/x86-64 directory with some files.
Most of the files are taken from Athlon and enhanced for x86-64.
Andreas
diff -urN gmp-4.1/mpn/x86-64/gmp-mparam.h /suse/aj/gmp-4.1/mpn/x86-64/gmp-mparam.h
--- gmp-4.1/mpn/x86-64/gmp-mparam.h Thu Jan 1 01:00:00 1970
+++ /suse/aj/gmp-4.1/mpn/x86-64/gmp-mparam.h Sun Sep 29 12:33:05 2002
@@ -0,0 +1,59 @@
+/* AMD x86-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+
+/* Taken from an Athlon to get some values. */
+
+#define MUL_KARATSUBA_THRESHOLD 26
+#define MUL_TOOM3_THRESHOLD 177
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_KARATSUBA_THRESHOLD 52
+#define SQR_TOOM3_THRESHOLD 186
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 91
+#define POWM_THRESHOLD 134
+
+#define GCD_ACCEL_THRESHOLD 3
+#define GCDEXT_THRESHOLD 25
+#define JACOBI_BASE_METHOD 1
+
+#define USE_PREINV_DIVREM_1 1
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 22
+#define GET_STR_PRECOMPUTE_THRESHOLD 35
+#define SET_STR_THRESHOLD 5634
+
+#define MUL_FFT_TABLE { 848, 1696, 3712, 7680, 22528, 57344, 0 }
+#define MUL_FFT_MODF_THRESHOLD 880
+#define MUL_FFT_THRESHOLD 9984
+
+#define SQR_FFT_TABLE { 784, 1824, 3712, 7680, 22528, 57344, 0 }
+#define SQR_FFT_MODF_THRESHOLD 848
+#define SQR_FFT_THRESHOLD 8448
+
diff -urN gmp-4.1/mpn/x86-64/lshift.asm /suse/aj/gmp-4.1/mpn/x86-64/lshift.asm
--- gmp-4.1/mpn/x86-64/lshift.asm Thu Jan 1 01:00:00 1970
+++ /suse/aj/gmp-4.1/mpn/x86-64/lshift.asm Sun Sep 29 16:25:14 2002
@@ -0,0 +1,73 @@
+dnl x86-64 mpn_lshift -- mpn left shift.
+
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+
+ C Parameter
+ C rdi dst
+ C rsi src
+ C rdx size
+ C rcx shift
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_lshift)
+
+ subq $8,%rsi C adjust src
+
+ movq (%rsi,%rdx,8),%r8 C read most significant limb
+ xorq %rax,%rax
+ shldq %cl, %r8, %rax C compute carry limb
+ decq %rdx
+ jz L(end)
+ movq %rax,%r9 C save carry limb
+ testb $1,%dl
+ jnz L(1) C enter loop in the middle
+ movq %r8,%rax
+
+ ALIGN(8)
+L(oop): movq (%rsi,%rdx,8),%r8 C load next lower limb
+ shldq %cl, %r8, %rax C compute result limb
+ movq %rax,(%rdi,%rdx,8) C store it
+ decq %rdx
+L(1): movq (%rsi,%rdx,8),%rax
+ shldq %cl, %rax, %r8
+ movq %r8,(%rdi,%rdx,8)
+ decq %rdx
+ jnz L(oop)
+
+ shlq %cl,%rax C compute least significant limb
+ movq %rax,(%rdi) C store it
+
+ movq %r9,%rax C get carry limb
+ ret
+
+L(end): shlq %cl,%r8 C compute least significant limb
+ movq %r8,(%rdi) C store it
+
+ ret
+
+EPILOGUE()
diff -urN gmp-4.1/mpn/x86-64/rshift.asm /suse/aj/gmp-4.1/mpn/x86-64/rshift.asm
--- gmp-4.1/mpn/x86-64/rshift.asm Thu Jan 1 01:00:00 1970
+++ /suse/aj/gmp-4.1/mpn/x86-64/rshift.asm Sun Sep 29 16:56:00 2002
@@ -0,0 +1,76 @@
+dnl x86-64 mpn_rshift -- mpn right shift.
+
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+include(`../config.m4')
+
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+
+ C Parameter
+ C rdi dst
+ C rsi src
+ C rdx size
+ C rcx shift
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_rshift)
+
+ leaq -8(%rdi,%rdx,8),%rdi
+ leaq (%rsi,%rdx,8),%rsi
+ negq %rdx
+
+ movq (%rsi,%rdx,8),%r8 C read least significant limb
+ xorq %rax,%rax
+ shrdq %cl, %r8, %rax C compute carry limb
+ incq %rdx
+ jz L(end)
+ movq %rax,%r9 C save carry limb
+ testb $1,%dl
+ jnz L(1) C enter loop in the middle
+ movq %r8,%rax
+
+ ALIGN(8)
+L(oop): movq (%rsi,%rdx,8),%r8 C load next higher limb
+ shrdq %cl, %r8, %rax C compute result limb
+ movq %rax,(%rdi,%rdx,8) C store it
+ incq %rdx
+L(1): movq (%rsi,%rdx,8),%rax
+ shrdq %cl, %rax, %r8
+ movq %r8,(%rdi,%rdx,8)
+ incq %rdx
+ jnz L(oop)
+
+ shrq %cl,%rax C compute most significant limb
+ movq %rax,(%rdi) C store it
+
+ movq %r9, %rax C get carry limb
+ ret
+
+L(end): shrq %cl,%r8 C compute most significant limb
+ movq %r8,(%rdi) C store it
+
+ ret
+
+EPILOGUE()
diff -urN gmp-4.1/mpn/x86-64/udiv.asm /suse/aj/gmp-4.1/mpn/x86-64/udiv.asm
--- gmp-4.1/mpn/x86-64/udiv.asm Thu Jan 1 01:00:00 1970
+++ /suse/aj/gmp-4.1/mpn/x86-64/udiv.asm Sun Sep 29 16:34:48 2002
@@ -0,0 +1,42 @@
+dnl x86-64 mpn_udiv_qrnnd -- 2 by 1 limb division
+
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_udiv_qrnnd (mp_limb_t *remptr, mp_limb_t high, mp_limb_t low,
+C mp_limb_t divisor);
+
+ C Parameter
+ C rdi remptr
+ C rsi high
+ C rdx low
+ C rcx divisor
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_udiv_qrnnd)
+ movq %rdx, %rax
+ movq %rsi, %rdx
+ divq %rcx
+ movq %rdx,(%rdi)
+ ret
+EPILOGUE()
diff -urN gmp-4.1/mpn/x86-64/umul.asm /suse/aj/gmp-4.1/mpn/x86-64/umul.asm
--- gmp-4.1/mpn/x86-64/umul.asm Thu Jan 1 01:00:00 1970
+++ /suse/aj/gmp-4.1/mpn/x86-64/umul.asm Sun Sep 29 16:32:15 2002
@@ -0,0 +1,41 @@
+dnl mpn_umul_ppmm -- 1x1->2 limb multiplication
+
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+include(`../config.m4')
+
+
+C mp_limb_t mpn_umul_ppmm (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2);
+C
+
+ C Parameter
+ C rdi lowptr
+ C rsi m1
+ C rdx m2
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_umul_ppmm)
+ movq %rsi,%rax
+ mulq %rdx
+ movq %rax, (%rdi)
+ movq %rdx, %rax
+ ret
+EPILOGUE()
diff -urN gmp-4.1/mpn/x86-64/x86-64-defs.m4 /suse/aj/gmp-4.1/mpn/x86-64/x86-64-defs.m4
--- gmp-4.1/mpn/x86-64/x86-64-defs.m4 Thu Jan 1 01:00:00 1970
+++ /suse/aj/gmp-4.1/mpn/x86-64/x86-64-defs.m4 Mon Aug 19 16:13:36 2002
@@ -0,0 +1,332 @@
+divert(-1)
+
+dnl m4 macros for x86-64 assembler.
+
+
+dnl Copyright 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+
+deflit(BYTES_PER_MP_LIMB, 8)
+
+
+
+dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
+dnl
+dnl In the x86 code we use explicit TEXT and ALIGN() calls in the code,
+dnl since different alignments are wanted in various circumstances. So for
+dnl instance,
+dnl
+dnl TEXT
+dnl ALIGN(16)
+dnl PROLOGUE(mpn_add_n)
+dnl ...
+dnl EPILOGUE()
+
+define(`PROLOGUE_cpu',
+m4_assert_numargs(1)
+ `GLOBL $1
+ TYPE($1,`function')
+$1:
+ifelse(WANT_PROFILING,`no',,`call_mcount
+')')
+
+
+dnl Usage: call_mcount
+dnl
+dnl For `gprof' style profiling, %ebp is setup as a frame pointer. None of
+dnl the assembler routines use %ebp this way, so it's done only for the
+dnl benefit of mcount. glibc sysdeps/i386/i386-mcount.S shows how mcount
+dnl gets the current function from (%esp) and the parent from 4(%ebp).
+dnl
+dnl For `prof' style profiling gcc generates mcount calls without setting
+dnl up %ebp, and the same is done here.
+
+define(`call_mcount',
+m4_assert_numargs(-1)
+m4_assert_defined(`WANT_PROFILING')
+m4_assert_defined(`MCOUNT_PIC_REG')
+m4_assert_defined(`MCOUNT_NONPIC_REG')
+m4_assert_defined(`MCOUNT_PIC_CALL')
+m4_assert_defined(`MCOUNT_NONPIC_CALL')
+`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
+` DATA
+ ALIGN(4)
+L(mcount_data_`'mcount_data_counter):
+ W32 0
+ TEXT
+')dnl
+ifelse(WANT_PROFILING,`gprof',
+` pushl %ebp
+ movl %esp, %ebp
+')dnl
+ifdef(`PIC',
+` pushl %ebx
+ mcount_movl_GOT_ebx
+ifelse(MCOUNT_PIC_REG,,,
+` leal L(mcount_data_`'mcount_data_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
+MCOUNT_PIC_CALL
+ popl %ebx
+',`dnl non-PIC
+ifelse(MCOUNT_NONPIC_REG,,,
+` movl `$'L(mcount_data_`'mcount_data_counter), MCOUNT_NONPIC_REG
+')dnl
+MCOUNT_NONPIC_CALL
+')dnl
+ifelse(WANT_PROFILING,`gprof',
+` popl %ebp
+')
+define(`mcount_data_counter',eval(mcount_data_counter+1))')
+
+define(mcount_data_counter,1)
+
+dnl Called: mcount_movl_GOT_ebx
+dnl Label H is "here", the %eip obtained from the call. C is the called
+dnl subroutine. J is the jump across that subroutine. A fetch and "ret"
+dnl is always done so calls and returns are balanced for the benefit of the
+dnl various x86s that have return stack branch prediction.
+define(mcount_movl_GOT_ebx,
+m4_assert_numargs(-1)
+` call L(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter)
+L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter):
+ jmp L(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter)
+L(mcount_movl_GOT_ebx_C`'mcount_movl_GOT_ebx_counter):
+ movl (%esp), %ebx
+ ret
+L(mcount_movl_GOT_ebx_J`'mcount_movl_GOT_ebx_counter):
+ addl $_GLOBAL_OFFSET_TABLE_+[.-L(mcount_movl_GOT_ebx_H`'mcount_movl_GOT_ebx_counter)], %ebx
+define(`mcount_movl_GOT_ebx_counter',incr(mcount_movl_GOT_ebx_counter))')
+
+define(mcount_movl_GOT_ebx_counter,1)
+
+
+dnl --------------------------------------------------------------------------
+dnl Various x86 macros.
+dnl
+
+
+dnl Usage: ALIGN_OFFSET(bytes,offset)
+dnl
+dnl Align to `offset' away from a multiple of `bytes'.
+dnl
+dnl This is useful for testing, for example align to something very strict
+dnl and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
+dnl
+dnl Generally you wouldn't execute across the padding, but it's done with
+dnl nop's so it'll work.
+
+define(ALIGN_OFFSET,
+m4_assert_numargs(2)
+`ALIGN($1)
+forloop(`i',1,$2,` nop
+')')
+
+
+dnl Usage: defframe(name,offset)
+dnl
+dnl Make a definition like the following with which to access a parameter
+dnl or variable on the stack.
+dnl
+dnl define(name,`FRAME+offset(%rsp)')
+dnl
+dnl Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
+dnl byte if FRAME+offset is zero, by putting (%rsp) rather than 0(%rsp).
+dnl Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
+dnl zero offset is wanted.
+dnl
+dnl The new macro also gets a check that when it's used FRAME is actually
+dnl defined, and that the final %esp offset isn't negative, which would
+dnl mean an attempt to access something below the current %esp.
+dnl
+dnl deflit() is used rather than a plain define(), so the new macro won't
+dnl delete any following parenthesized expression. name(%rdi) will come
+dnl out say as 16(%rsp)(%rdi). This isn't valid assembler and should
+dnl provoke an error, which is better than silently giving just 16(%rsp).
+dnl
+dnl See README for more on the suggested way to access the stack frame.
+
+define(defframe,
+m4_assert_numargs(2)
+`deflit(`$1',
+m4_assert_defined(`FRAME')
+`defframe_check_notbelow(`$1',$2,FRAME)dnl
+defframe_empty_if_zero(FRAME+($2))(%rsp)')')
+
+dnl Called: defframe_empty_if_zero(expression)
+define(defframe_empty_if_zero,
+m4_assert_numargs(1)
+`ifelse(defframe_empty_if_zero_disabled,1,
+`eval($1)',
+`m4_empty_if_zero($1)')')
+
+dnl Called: defframe_check_notbelow(`name',offset,FRAME)
+define(defframe_check_notbelow,
+m4_assert_numargs(3)
+`ifelse(eval(($3)+($2)<0),1,
+`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
+')')')
+
+
+dnl Usage: FRAME_pushq()
+dnl FRAME_popq()
+dnl FRAME_addq_esp(n)
+dnl FRAME_subq_esp(n)
+dnl
+dnl Adjust FRAME appropriately for a pushq or popq, or for an addq or subq
+dnl %rsp of n bytes.
+dnl
+dnl Using these macros is completely optional. Sometimes it makes more
+dnl sense to put explicit deflit(`FRAME',N) forms, especially when there's
+dnl jumps and different sequences of FRAME values need to be used in
+dnl different places.
+
+define(FRAME_pushl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+4))')
+
+define(FRAME_popl,
+m4_assert_numargs(0)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-4))')
+
+define(FRAME_addl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME-($1)))')
+
+define(FRAME_subl_esp,
+m4_assert_numargs(1)
+m4_assert_defined(`FRAME')
+`deflit(`FRAME',eval(FRAME+($1)))')
+
+
+dnl Usage: defframe_pushq(name)
+dnl
+dnl Do a combination FRAME_pushq() and a defframe() to name the stack
+dnl location just pushed. This should come after a pushl instruction.
+dnl Putting it on the same line works and avoids lengthening the code. For
+dnl example,
+dnl
+dnl pushq %rax defframe_pushq(VAR_COUNTER)
+dnl
+dnl Notice the defframe() is done with an unquoted -FRAME thus giving its
+dnl current value without tracking future changes.
+
+define(defframe_pushl,
+m4_assert_numargs(1)
+`FRAME_pushl()defframe(`$1',-FRAME)')
+
+
+dnl --------------------------------------------------------------------------
+dnl Assembler instruction macros.
+dnl
+
+
+dnl Usage: shldl(count,src,dst)
+dnl shrdl(count,src,dst)
+dnl shldw(count,src,dst)
+dnl shrdw(count,src,dst)
+dnl
+dnl Generate a double-shift instruction, possibly omitting a %cl count
+dnl parameter if that's what the assembler requires, as indicated by
+dnl WANT_SHLDL_CL in config.m4. For example,
+dnl
+dnl shldl( %cl, %eax, %ebx)
+dnl
+dnl turns into either
+dnl
+dnl shldl %cl, %eax, %ebx
+dnl or
+dnl shldl %eax, %ebx
+dnl
+dnl Immediate counts are always passed through unchanged. For example,
+dnl
+dnl shrdl( $2, %esi, %edi)
+dnl becomes
+dnl shrdl $2, %esi, %edi
+dnl
+dnl
+dnl If you forget to use the macro form "shldl( ...)" and instead write
+dnl just a plain "shldl ...", an error results. This ensures the necessary
+dnl variant treatment of %cl isn't accidentally bypassed.
+
+define(define_shd_instruction,
+m4_assert_numargs(1)
+`define($1,
+m4_instruction_wrapper()
+m4_assert_numargs(3)
+`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
+m4_doublequote($`'2),m4_doublequote($`'3)))')
+
+dnl Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
+define_shd_instruction(shldl)
+define_shd_instruction(shrdl)
+define_shd_instruction(shldw)
+define_shd_instruction(shrdw)
+
+dnl Called: shd_instruction(op,count,src,dst)
+define(shd_instruction,
+m4_assert_numargs(4)
+m4_assert_defined(`WANT_SHLDL_CL')
+`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
+``$1' `$3', `$4'',
+``$1' `$2', `$3', `$4'')')
+
+
+dnl Usage: ASSERT([cond][,instructions])
+dnl
+dnl If WANT_ASSERT is 1, output the given instructions and expect the given
+dnl flags condition to then be satisfied. For example,
+dnl
+dnl ASSERT(ne, `cmpl %eax, %ebx')
+dnl
+dnl The instructions can be omitted to just assert a flags condition with
+dnl no extra calculation. For example,
+dnl
+dnl ASSERT(nc)
+dnl
+dnl When `instructions' is not empty, a pushf/popf is added to preserve the
+dnl flags, but the instructions themselves must preserve any registers that
+dnl matter. FRAME is adjusted for the push and pop, so the instructions
+dnl given can use defframe() stack variables.
+dnl
+dnl The condition can be omitted to just output the given instructions when
+dnl assertion checking is wanted. In this case the pushf/popf is omitted.
+dnl For example,
+dnl
+dnl ASSERT(, `movl %eax, VAR_KEEPVAL')
+
+define(ASSERT,
+m4_assert_numargs_range(1,2)
+`ifelse(WANT_ASSERT,1,
+`ifelse(`$1',,
+ `$2',
+ `C ASSERT
+ifelse(`$2',,,` pushf ifdef(`FRAME',`FRAME_pushl()')')
+ $2
+ j`$1' L(ASSERT_ok`'ASSERT_counter)
+ ud2 C assertion failed
+L(ASSERT_ok`'ASSERT_counter):
+ifelse(`$2',,,` popf ifdef(`FRAME',`FRAME_popl()')')
+define(`ASSERT_counter',incr(ASSERT_counter))')')')
+
+define(ASSERT_counter,1)
+
+divert`'dnl
--
Andreas Jaeger
SuSE Labs aj@suse.de
private aj@arthur.inka.de
http://www.suse.de/~aj