Assembly routines break Windows 64-bit SEH
E. Madison Bray
erik.m.bray at gmail.com
Thu May 2 12:16:46 UTC 2019
Hello,
Attached is a sample program which deliberately induces a segmentation
fault in some of GMP's assembly code; in this case mpn_divrem_1 since
that happens to be where I first discovered the problem, though it
could affect most any of them. I am working on 64-bit Cygwin, but
this problem can affect any code on Windows (including Cygwin, MinGW,
native Windows) that uses GMP. Though the problem is particularly
severe on Cygwin, which relies on structured exception handling to
catch exceptions and convert them to POSIX signals and/or return
proper error codes.
When working correctly, compiling and running the attached program
should work as follows:
$ gcc test.c -lgmp
$ ./a.exe; echo $?
Segmentation fault (core dumped)
139
Instead this is what happens currently:
$ ./a.exe; echo $?
0
No output, and the process exit code is zero (this latter effect is
kind of an unfortunate problem in Cygwin that if a process terminates
due to an exception where Cygwin's exception handler was somehow not
able to run, the exit code returned defaults to zero).
This is because the assembly routines do not include the metadata that
is necessary on 64-bit Windows [1] for stack unwinding to work
properly during exception handling. This was brought up once before
on this list many years ago [2] but AFAICT nothing has ever been done
about it.
I was able to confirm that this was the issue by manually editing the
assembly for mpn_divrem_1 and recompiling/linking. I modified the
function prologue to look like:
.seh_proc __gmpn_divrem_1_x86_64
__gmpn_divrem_1_x86_64:
push %rdi
.seh_pushreg %rdi
push %rsi
.seh_pushreg %rsi
mov %rcx, %rdi
mov %rdx, %rsi
mov %r8, %rdx
mov %r9, %rcx
mov 56(%rsp), %r8
xor %eax, %eax
push %r13
.seh_pushreg %r13
push %r12
.seh_pushreg %r12
push %rbp
.seh_pushreg %rbp
push %rbx
.seh_pushreg %rbx
.seh_endprologue
At the end of the function just a .seh_endproc is needed as well.
Technically this is still not quite correct because the procedure does
later modify RSP in preparation for a `call __gmpn_invert_limb`.
Accordingly, this requires establishment of a frame pointer during the
prologue, or otherwise if an exception occurred in __gmpn_invert_limb
stack unwinding would still fail. In my test case this does not
happen so it still works--but I should still note this subtlety.
I believe this is possible to fix in general, and would be happy to
work on a patch if it would be accepted in principle.
Many of the existing m4 macros in the assembly routines (such as
PROLOGUE and EPILOGUE) can be modified for x64 to add the necessary
bits. It would probably be good also to replace some of the explicit
`push <reg>` instructions in the prologues with some new macros
similar to those provided by MASM [3]. For example a
`push_reg(<reg>)` macro would emit (on Windows 64):
push <reg>
.seh_pushreg <reg>
whereas on all other platforms it would just emit the plain `push
<reg>` instruction. This part I believe is easy.
The trickiest part is just ensuring that some register is available to
establish a frame pointer, when necessary (it doesn't necessarily have
to be RBP; any nonvolatile register will do). In the case of
mpn_divrem_1 I can see that %r14 is available, but I will have to go
through all the routines one by one and work this out.
So, should I work on this?
Thanks,
Madison
[1] https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2019
[2] https://gmplib.org/list-archives/gmp-bugs/2008-March/000951.html
[3] https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2019#masm-macros
-------------- next part --------------
#include <gmp.h>
#include <stdlib.h>
int main(void) {
mp_limb_t *rlp;
mp_size_t qxn = 0;
mp_limb_t s2p[1] = {0};
mp_size_t s2n = 1;
mp_limb_t s3limb = 1;
/* Use of this function in particular is arbitrary aside from
* the fact that it is known to demonstrate the problem on my
* system (my system uses an assembly implementation for it)
*/
/* Just set to something that will segfault when accessed */
rlp = (mp_limb_t*)0x1234;
mpn_divrem_1(rlp, qxn, s2p, s2n, s3limb);
return 1;
}
-------------- next part --------------
.text
.align 16, 0x90
.globl __gmpn_preinv_divrem_1_x86_64
.seh_proc __gmpn_preinv_divrem_1_x86_64
__gmpn_preinv_divrem_1_x86_64:
push %rdi
.seh_pushreg %rdi
push %rsi
.seh_pushreg %rsi
mov %rcx, %rdi
mov %rdx, %rsi
mov %r8, %rdx
mov %r9, %rcx
mov 56(%rsp), %r8
mov 64(%rsp), %r9
xor %eax, %eax
push %r13
push %r12
push %rbp
push %rbx
mov %rsi, %r12
mov %rcx, %rbx
add %rsi, %rcx
mov %rdx, %rsi
lea -8(%rdi,%rcx,8), %rdi
test %r8, %r8
js Lnent
mov 104(%rsp), %cl
shl %cl, %r8
jmp Luent
.seh_endproc
.align 16, 0x90
.globl __gmpn_divrem_1_x86_64
.seh_proc __gmpn_divrem_1_x86_64
__gmpn_divrem_1_x86_64:
push %rdi
.seh_pushreg %rdi
push %rsi
.seh_pushreg %rsi
mov %rcx, %rdi
mov %rdx, %rsi
mov %r8, %rdx
mov %r9, %rcx
mov 56(%rsp), %r8
xor %eax, %eax
push %r13
.seh_pushreg %r13
push %r12
.seh_pushreg %r12
push %rbp
.seh_pushreg %rbp
push %rbx
.seh_pushreg %rbx
.seh_endprologue
mov %rsi, %r12
mov %rcx, %rbx
add %rsi, %rcx
mov %rdx, %rsi
je Lret
lea -8(%rdi,%rcx,8), %rdi
xor %ebp, %ebp
test %r8, %r8
jns Lunnormalized
Lnormalized:
test %rbx, %rbx
je L8
mov -8(%rsi,%rbx,8), %rbp
dec %rbx
mov %rbp, %rax
sub %r8, %rbp
cmovc %rax, %rbp
sbb %eax, %eax
inc %eax
mov %rax, (%rdi)
lea -8(%rdi), %rdi
L8:
push %r8
sub $32, %rsp
mov %r8, %rcx
call __gmpn_invert_limb
add $32, %rsp
pop %r8
mov %rax, %r9
mov %rbp, %rax
jmp Lnent
.align 16, 0x90
Lntop:mov (%rsi,%rbx,8), %r10
mul %r9
add %r10, %rax
adc %rbp, %rdx
mov %rax, %rbp
mov %rdx, %r13
imul %r8, %rdx
sub %rdx, %r10
mov %r8, %rax
add %r10, %rax
cmp %rbp, %r10
cmovc %r10, %rax
adc $-1, %r13
cmp %r8, %rax
jae Lnfx
Lnok: mov %r13, (%rdi)
sub $8, %rdi
Lnent:lea 1(%rax), %rbp
dec %rbx
jns Lntop
xor %ecx, %ecx
jmp Lfrac
Lnfx: sub %r8, %rax
inc %r13
jmp Lnok
Lunnormalized:
test %rbx, %rbx
je L44
mov -8(%rsi,%rbx,8), %rax
cmp %r8, %rax
jae L44
mov %rbp, (%rdi)
mov %rax, %rbp
lea -8(%rdi), %rdi
je Lret
dec %rbx
L44:
bsr %r8, %rcx
not %ecx
shl %cl, %r8
shl %cl, %rbp
push %rcx
push %r8
sub $40, %rsp
mov %r8, %rcx
call __gmpn_invert_limb
add $40, %rsp
pop %r8
pop %rcx
mov %rax, %r9
mov %rbp, %rax
test %rbx, %rbx
je Lfrac
Luent:dec %rbx
mov (%rsi,%rbx,8), %rbp
neg %ecx
shr %cl, %rbp
neg %ecx
or %rbp, %rax
jmp Lent
.align 16, 0x90
Lutop:mov (%rsi,%rbx,8), %r10
shl %cl, %rbp
neg %ecx
shr %cl, %r10
neg %ecx
or %r10, %rbp
mul %r9
add %rbp, %rax
adc %r11, %rdx
mov %rax, %r11
mov %rdx, %r13
imul %r8, %rdx
sub %rdx, %rbp
mov %r8, %rax
add %rbp, %rax
cmp %r11, %rbp
cmovc %rbp, %rax
adc $-1, %r13
cmp %r8, %rax
jae Lufx
Luok: mov %r13, (%rdi)
sub $8, %rdi
Lent: mov (%rsi,%rbx,8), %rbp
dec %rbx
lea 1(%rax), %r11
jns Lutop
Luend:shl %cl, %rbp
mul %r9
add %rbp, %rax
adc %r11, %rdx
mov %rax, %r11
mov %rdx, %r13
imul %r8, %rdx
sub %rdx, %rbp
mov %r8, %rax
add %rbp, %rax
cmp %r11, %rbp
cmovc %rbp, %rax
adc $-1, %r13
cmp %r8, %rax
jae Lefx
Leok: mov %r13, (%rdi)
sub $8, %rdi
jmp Lfrac
Lufx: sub %r8, %rax
inc %r13
jmp Luok
Lefx: sub %r8, %rax
inc %r13
jmp Leok
Lfrac:mov %r8, %rbp
neg %rbp
jmp Lfent
.align 16, 0x90
Lftop:mul %r9
add %r11, %rdx
mov %rax, %r11
mov %rdx, %r13
imul %rbp, %rdx
mov %r8, %rax
add %rdx, %rax
cmp %r11, %rdx
cmovc %rdx, %rax
adc $-1, %r13
mov %r13, (%rdi)
sub $8, %rdi
Lfent:lea 1(%rax), %r11
dec %r12
jns Lftop
shr %cl, %rax
Lret: pop %rbx
pop %rbp
pop %r12
pop %r13
pop %rsi
pop %rdi
ret
.seh_endproc
More information about the gmp-bugs
mailing list