I'm trying to learn x86-64 inline assembly and decided to implement this very simple swap method that simply orders a and b in ascending order:
#include <stdio.h>
void swap(int* a, int* b)
{
    asm(".intel_syntax noprefix");
    asm("mov    eax, DWORD PTR [rdi]");
    asm("mov    ebx, DWORD PTR [rsi]");
    asm("cmp    eax, ebx");
    asm("jle    .L1");
    asm("mov    DWORD PTR [rdi], ebx");
    asm("mov    DWORD PTR [rsi], eax");
    asm(".L1:");
    asm(".att_syntax noprefix");
}
int main()
{
    int input[3];
    scanf("%d%d%d", &input[0], &input[1], &input[2]);
    swap(&input[0], &input[1]);
    swap(&input[1], &input[2]);
    swap(&input[0], &input[1]);
    printf("%d %d %d\n", input[0], input[1], input[2]);
    return 0;
}
The above code works as expected when I run it with this command:
> gcc main.c
> ./a.out
> 3 2 1
> 1 2 3
However, as soon as I turn optimazation on I get the following error messages:
> gcc -O2 main.c
> main.c: Assembler messages:
> main.c:12: Error: symbol `.L1' is already defined
> main.c:12: Error: symbol `.L1' is already defined
> main.c:12: Error: symbol `.L1' is already defined
If I've understood it correctly, this is because gcc tries to inline my swap function when optimization is turned on, causing the label .L1 to be defined multiple times in the assembly file.
I've tried to find an answer to this problem, but nothing seems to work. In this previusly asked question it's suggested to use local labels instead, and I've tried that aswell:
#include <stdio.h>
void swap(int* a, int* b)
{
    asm(".intel_syntax noprefix");
    asm("mov    eax, DWORD PTR [rdi]");
    asm("mov    ebx, DWORD PTR [rsi]");
    asm("cmp    eax, ebx");
    asm("jle    1f");
    asm("mov    DWORD PTR [rdi], ebx");
    asm("mov    DWORD PTR [rsi], eax");
    asm("1:");
    asm(".att_syntax noprefix");
}
But when trying to run the program I now get a segmentation fault instead:
> gcc -O2 main.c
> ./a.out
> 3 2 1
> Segmentation fault
I also tried the suggested solution to this previusly asked question and changed the name .L1 to CustomLabel1 in case there would be a name collision, but it still gives me the old error:
> gcc -O2 main.c
> main.c: Assembler messages:
> main.c:12: Error: symbol `CustomLabel1' is already defined
> main.c:12: Error: symbol `CustomLabel1' is already defined
> main.c:12: Error: symbol `CustomLabel1' is already defined
Finally I also tried this suggestion:
void swap(int* a, int* b)
{
    asm(".intel_syntax noprefix");
    asm("mov    eax, DWORD PTR [rdi]");
    asm("mov    ebx, DWORD PTR [rsi]");
    asm("cmp    eax, ebx");
    asm("jle    label%=");
    asm("mov    DWORD PTR [rdi], ebx");
    asm("mov    DWORD PTR [rsi], eax");
    asm("label%=:");
    asm(".att_syntax noprefix");
}
But then I get these errors instead:
main.c: Assembler messages:
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
So, my question is:
How can I use labels in inline assembly?
This is the disassemble output for the optimized version:
> gcc -O2 -S main.c
    .file   "main.c"
    .section    .text.unlikely,"ax",@progbits
.LCOLDB0:
    .text
.LHOTB0:
    .p2align 4,,15
    .globl  swap
    .type   swap, @function
swap:
.LFB23:
    .cfi_startproc
#APP
# 5 "main.c" 1
    .intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
    mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
    mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
    cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
    jle 1f
# 0 "" 2
# 10 "main.c" 1
    mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
    mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
    1:
# 0 "" 2
# 13 "main.c" 1
    .att_syntax noprefix
# 0 "" 2
#NO_APP
    ret
    .cfi_endproc
.LFE23:
    .size   swap, .-swap
    .section    .text.unlikely
.LCOLDE0:
    .text
.LHOTE0:
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string "%d%d%d"
.LC2:
    .string "%d %d %d\n"
    .section    .text.unlikely
.LCOLDB3:
    .section    .text.startup,"ax",@progbits
.LHOTB3:
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB24:
    .cfi_startproc
    subq    $40, %rsp
    .cfi_def_cfa_offset 48
    movl    $.LC1, %edi
    movq    %fs:40, %rax
    movq    %rax, 24(%rsp)
    xorl    %eax, %eax
    leaq    8(%rsp), %rcx
    leaq    4(%rsp), %rdx
    movq    %rsp, %rsi
    call    __isoc99_scanf
#APP
# 5 "main.c" 1
    .intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
    mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
    mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
    cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
    jle 1f
# 0 "" 2
# 10 "main.c" 1
    mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
    mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
    1:
# 0 "" 2
# 13 "main.c" 1
    .att_syntax noprefix
# 0 "" 2
# 5 "main.c" 1
    .intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
    mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
    mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
    cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
    jle 1f
# 0 "" 2
# 10 "main.c" 1
    mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
    mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
    1:
# 0 "" 2
# 13 "main.c" 1
    .att_syntax noprefix
# 0 "" 2
# 5 "main.c" 1
    .intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
    mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
    mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
    cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
    jle 1f
# 0 "" 2
# 10 "main.c" 1
    mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
    mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
    1:
# 0 "" 2
# 13 "main.c" 1
    .att_syntax noprefix
# 0 "" 2
#NO_APP
    movl    8(%rsp), %r8d
    movl    4(%rsp), %ecx
    movl    $.LC2, %esi
    movl    (%rsp), %edx
    xorl    %eax, %eax
    movl    $1, %edi
    call    __printf_chk
    movq    24(%rsp), %rsi
    xorq    %fs:40, %rsi
    jne .L6
    xorl    %eax, %eax
    addq    $40, %rsp
    .cfi_remember_state
    .cfi_def_cfa_offset 8
    ret
.L6:
    .cfi_restore_state
    call    __stack_chk_fail
    .cfi_endproc
.LFE24:
    .size   main, .-main
    .section    .text.unlikely
.LCOLDE3:
    .section    .text.startup
.LHOTE3:
    .ident  "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609"
    .section    .note.GNU-stack,"",@progbits
 
     
     
     
     
    