Skip to main content
Tweeted twitter.com/StackCodeReview/status/1493510407042043911
Became Hot Network Question
edited tags
Link
200_success
  • 145.7k
  • 22
  • 191
  • 481
added 85 characters in body
Source Link
xiver77
  • 419
  • 3
  • 9
    section .bss

stdout:
    resb 4

    section .text

    extern _GetStdHandle@4
    extern _WriteFile@20

err:
    ud2

    global _initstdout
_initstdout:
    push -11
    call _GetStdHandle@4
    cmp eax, -1
    je err
    mov [stdout], eax
    ret

divq10: ; edx:eax <- edx:eax / 10, ecx <- remainder
    push ebx
    mov ecx, eax
    mov eax, edx
    xor edx, edx
    mov ebx, 10
    div ebx
    mov ebx, eax
    mov eax, ecx
    mov ecx, 10
    div ecx
    mov ecx, edx
    mov edx, ebx
    pop ebx
    ret

llu2str: ; edx:eax -> *ecx (string), eax <- count
    push ebx
    push edi
    push esi
    push ebp
    mov edi, eax
    mov esi, edx
    mov ebp, ecx
    xor ebx, ebx
.0:
    inc ebx
    call divq10
    mov ecx, eax
    or ecx, edx
    jnz .0
    mov eax, edi
    mov edx, esi
    mov edi, ebx
.1:
    call divq10
    add ecx, '0'
    dec ebx
    mov [ebp + ebx], cl
    jnz .1
    mov eax, edi
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret

    global _printf
_printf:
    push ebx
    push edi
    push esi
    push ebp
    lea ebp, [esp + 24]
    mov esi, [ebp - 4]
    sub esp, 1024
    mov edi, esp
.start:
    mov bl, [esi]
    test bl, bl
    jz .end
    cmp bl, '%'
    jne .copy
    inc esi
    mov bl, [esi]
    cmp bl, 'u'
    jne .d0
    mov eax, [ebp]
    add ebp, 4
    xor edx, edx
.u1:
    mov ecx, edi
    call llu2str
    add edi, eax
    jmp .next
.d0:
    mov bl, [esi]
    cmp bl, 'd'
    jne .ll
    mov eax, [ebp]
    add ebp, 4
    cdq
.d1:
    mov ecx, edx
    shr ecx, 31
    jz .u1
    mov byte [edi], '-'
    inc edi
    neg eax
    adc edx, 0
    neg edx
    jmp .u1
.ll:
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov eax, [ebp]
    mov edx, [ebp + 4]
    add ebp, 8
    mov bl, [esi]
    cmp bl, 'u'
    je .u1
    cmp bl, 'd'
    je .d1
    jmp err
.copy:
    mov [edi], bl
    inc edi
.next:
    inc esi
    jmp .start
.end:
    mov eax, esp
    push 0
    push edi
    sub edi, eax
    push edi
    push eax
    push dword [stdout]
    call _WriteFile@20
    test eax, eax
    jz err
    add esp, 1024
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret
    section .bss

stdout:
    resb 4

    section .text

    extern _GetStdHandle@4
    extern _WriteFile@20

err:
    ud2

    global _initstdout
_initstdout:
    push -11
    call _GetStdHandle@4
    cmp eax, -1
    je err
    mov [stdout], eax
    ret

divq10:
    push ebx
    mov ecx, eax
    mov eax, edx
    xor edx, edx
    mov ebx, 10
    div ebx
    mov ebx, eax
    mov eax, ecx
    mov ecx, 10
    div ecx
    mov ecx, edx
    mov edx, ebx
    pop ebx
    ret

llu2str:
    push ebx
    push edi
    push esi
    push ebp
    mov edi, eax
    mov esi, edx
    mov ebp, ecx
    xor ebx, ebx
.0:
    inc ebx
    call divq10
    mov ecx, eax
    or ecx, edx
    jnz .0
    mov eax, edi
    mov edx, esi
    mov edi, ebx
.1:
    call divq10
    add ecx, '0'
    dec ebx
    mov [ebp + ebx], cl
    jnz .1
    mov eax, edi
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret

    global _printf
_printf:
    push ebx
    push edi
    push esi
    push ebp
    lea ebp, [esp + 24]
    mov esi, [ebp - 4]
    sub esp, 1024
    mov edi, esp
.start:
    mov bl, [esi]
    test bl, bl
    jz .end
    cmp bl, '%'
    jne .copy
    inc esi
    mov bl, [esi]
    cmp bl, 'u'
    jne .d0
    mov eax, [ebp]
    add ebp, 4
    xor edx, edx
.u1:
    mov ecx, edi
    call llu2str
    add edi, eax
    jmp .next
.d0:
    mov bl, [esi]
    cmp bl, 'd'
    jne .ll
    mov eax, [ebp]
    add ebp, 4
    cdq
.d1:
    mov ecx, edx
    shr ecx, 31
    jz .u1
    mov byte [edi], '-'
    inc edi
    neg eax
    adc edx, 0
    neg edx
    jmp .u1
.ll:
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov eax, [ebp]
    mov edx, [ebp + 4]
    add ebp, 8
    mov bl, [esi]
    cmp bl, 'u'
    je .u1
    cmp bl, 'd'
    je .d1
    jmp err
.copy:
    mov [edi], bl
    inc edi
.next:
    inc esi
    jmp .start
.end:
    mov eax, esp
    push 0
    push edi
    sub edi, eax
    push edi
    push eax
    push dword [stdout]
    call _WriteFile@20
    test eax, eax
    jz err
    add esp, 1024
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret
    section .bss

stdout:
    resb 4

    section .text

    extern _GetStdHandle@4
    extern _WriteFile@20

err:
    ud2

    global _initstdout
_initstdout:
    push -11
    call _GetStdHandle@4
    cmp eax, -1
    je err
    mov [stdout], eax
    ret

divq10: ; edx:eax <- edx:eax / 10, ecx <- remainder
    push ebx
    mov ecx, eax
    mov eax, edx
    xor edx, edx
    mov ebx, 10
    div ebx
    mov ebx, eax
    mov eax, ecx
    mov ecx, 10
    div ecx
    mov ecx, edx
    mov edx, ebx
    pop ebx
    ret

llu2str: ; edx:eax -> *ecx (string), eax <- count
    push ebx
    push edi
    push esi
    push ebp
    mov edi, eax
    mov esi, edx
    mov ebp, ecx
    xor ebx, ebx
.0:
    inc ebx
    call divq10
    mov ecx, eax
    or ecx, edx
    jnz .0
    mov eax, edi
    mov edx, esi
    mov edi, ebx
.1:
    call divq10
    add ecx, '0'
    dec ebx
    mov [ebp + ebx], cl
    jnz .1
    mov eax, edi
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret

    global _printf
_printf:
    push ebx
    push edi
    push esi
    push ebp
    lea ebp, [esp + 24]
    mov esi, [ebp - 4]
    sub esp, 1024
    mov edi, esp
.start:
    mov bl, [esi]
    test bl, bl
    jz .end
    cmp bl, '%'
    jne .copy
    inc esi
    mov bl, [esi]
    cmp bl, 'u'
    jne .d0
    mov eax, [ebp]
    add ebp, 4
    xor edx, edx
.u1:
    mov ecx, edi
    call llu2str
    add edi, eax
    jmp .next
.d0:
    mov bl, [esi]
    cmp bl, 'd'
    jne .ll
    mov eax, [ebp]
    add ebp, 4
    cdq
.d1:
    mov ecx, edx
    shr ecx, 31
    jz .u1
    mov byte [edi], '-'
    inc edi
    neg eax
    adc edx, 0
    neg edx
    jmp .u1
.ll:
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov eax, [ebp]
    mov edx, [ebp + 4]
    add ebp, 8
    mov bl, [esi]
    cmp bl, 'u'
    je .u1
    cmp bl, 'd'
    je .d1
    jmp err
.copy:
    mov [edi], bl
    inc edi
.next:
    inc esi
    jmp .start
.end:
    mov eax, esp
    push 0
    push edi
    sub edi, eax
    push edi
    push eax
    push dword [stdout]
    call _WriteFile@20
    test eax, eax
    jz err
    add esp, 1024
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret
Source Link
xiver77
  • 419
  • 3
  • 9

Minimal `printf` for integer types in x86 assembly

I'm writing a minimal C runtime targeting an old 32-bit Windows XP machine as a personal project. The C runtime provided by compilers is quite bloated. I wouldn't mind some library bloats up to several megabytes if this was some paid project, since even a very old PC would load it very fast anyway, but as a personal project, I'm just doing whatever comforts me. (1)

This printf can currently only handle %d, %lld, %u, and %llu.

The routine is optimized for size, not for speed. IO doesn't happen in a middle of a hot loop - if it does, it is not a hot loop - so it makes more sense to take the minimal amount of size in an executable.

  • div with a constant divisor is preferred over multiply and shift with the multiplicative inverse.
  • mov xl, byte [] instead of movsx exx, byte []; saves a byte
  • packed code, unaligned jump targets
  • Code duplication is avoided whenever possible.

Non-variadic functions follow the regparm(3) calling convention. The arguments are passed to eax, edx, and ecx in order, and the return value is stored in eax and edx. A local function divq10 disobeys the rule by also returning with ecx.

printf.s

    section .bss

stdout:
    resb 4

    section .text

    extern _GetStdHandle@4
    extern _WriteFile@20

err:
    ud2

    global _initstdout
_initstdout:
    push -11
    call _GetStdHandle@4
    cmp eax, -1
    je err
    mov [stdout], eax
    ret

divq10:
    push ebx
    mov ecx, eax
    mov eax, edx
    xor edx, edx
    mov ebx, 10
    div ebx
    mov ebx, eax
    mov eax, ecx
    mov ecx, 10
    div ecx
    mov ecx, edx
    mov edx, ebx
    pop ebx
    ret

llu2str:
    push ebx
    push edi
    push esi
    push ebp
    mov edi, eax
    mov esi, edx
    mov ebp, ecx
    xor ebx, ebx
.0:
    inc ebx
    call divq10
    mov ecx, eax
    or ecx, edx
    jnz .0
    mov eax, edi
    mov edx, esi
    mov edi, ebx
.1:
    call divq10
    add ecx, '0'
    dec ebx
    mov [ebp + ebx], cl
    jnz .1
    mov eax, edi
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret

    global _printf
_printf:
    push ebx
    push edi
    push esi
    push ebp
    lea ebp, [esp + 24]
    mov esi, [ebp - 4]
    sub esp, 1024
    mov edi, esp
.start:
    mov bl, [esi]
    test bl, bl
    jz .end
    cmp bl, '%'
    jne .copy
    inc esi
    mov bl, [esi]
    cmp bl, 'u'
    jne .d0
    mov eax, [ebp]
    add ebp, 4
    xor edx, edx
.u1:
    mov ecx, edi
    call llu2str
    add edi, eax
    jmp .next
.d0:
    mov bl, [esi]
    cmp bl, 'd'
    jne .ll
    mov eax, [ebp]
    add ebp, 4
    cdq
.d1:
    mov ecx, edx
    shr ecx, 31
    jz .u1
    mov byte [edi], '-'
    inc edi
    neg eax
    adc edx, 0
    neg edx
    jmp .u1
.ll:
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov bl, [esi]
    cmp bl, 'l'
    jne err
    inc esi
    mov eax, [ebp]
    mov edx, [ebp + 4]
    add ebp, 8
    mov bl, [esi]
    cmp bl, 'u'
    je .u1
    cmp bl, 'd'
    je .d1
    jmp err
.copy:
    mov [edi], bl
    inc edi
.next:
    inc esi
    jmp .start
.end:
    mov eax, esp
    push 0
    push edi
    sub edi, eax
    push edi
    push eax
    push dword [stdout]
    call _WriteFile@20
    test eax, eax
    jz err
    add esp, 1024
    pop ebp
    pop esi
    pop edi
    pop ebx
    ret

test.c

void initstdout(void);
void printf();

void start() {
    initstdout();
    printf("Hello, world!\n");
    printf("%d %u %lld %llu\n", 0, 0, 0, 0);
    int dm = 1u << 31;
    int dx = (1u << 31) - 1;
    int ux = -1;
    long long lldm = 1llu << 63;
    long long lldx = (1llu << 63) - 1;
    long long llux = -1;
    printf("%d %d %d\n%lld %lld %lld\n", dm, dx, ux, lldm, lldx, llux);
    printf("%u %u %u\n%llu %llu %llu\n", dm, dx, ux, lldm, lldx, llux);
}

build.sh

O="-O3 -msse2 -fno-builtin -fno-asynchronous-unwind-tables"
S="-std=c11 -pedantic -masm=intel"
F="$O $S"
LF="--entry=_start --subsystem=console --enable-stdcall-fixup"
SYS="/c/Windows/SysWOW64"
gcc -c $F test.c
nasm -fwin32 printf.s
ld -or.exe $LF *.o *.obj $SYS/kernel32.dll

I used gcc and ld, but MSVC's cl and link should also work fine. I had to put --enable-stdcall-fixup to shut up warnings, but I currently don't know why those warnings are happening. AFAIK Windows API functions have _@ decorations on 32-bit, but the linker is complaining that I shouldn't have put those decorations.

output

Hello, world!
0 0 0 0
-2147483648 2147483647 -1
-9223372036854775808 9223372036854775807 -1
2147483648 2147483647 4294967295
9223372036854775808 9223372036854775807 18446744073709551615

(1) MinGW GCC creates a 100KB executable for a single call to printf, including all the initialization code, and its own fix-up code to patch the default Windows C runtime. MSVC provides a several-hundred-KB DLL runtime, which should always be provided as-is to distribute freely.