Assembly included. This weekend I tried to get my own small library running without any C libs and the thread local stuff is giving me problems. Below you can see I created a struct called Try1 (because it's my first attempt!) If I set the thread local variable and use it, the code seems to execute fine. If I call a const method on Try1 with a global variable it seems to run fine. Now if I do both, it's not fine. It segfaults despite me being able to access members and running the function with a global variable. The code will print Hello and Hello2 but not Hello3
I suspect the problem is the address of the variable. I tried using an if statement to print the first hello. if ((s64)&t1 > (s64)buf+1024*16) It was true so it means the pointer isn't where I thought it was. Also it isn't -8 as gdb suggest (it's a signed compare and I tried 0 instead of buf)
Assembly under the c++ code. First line is the first call to write
//test.cpp
//clang++ or g++ -std=c++20 -g -fno-rtti -fno-exceptions -fno-stack-protector -fno-asynchronous-unwind-tables -static -nostdlib test.cpp -march=native && ./a.out
#include <immintrin.h>
typedef unsigned long long int u64;
ssize_t my_write(int fd, const void *buf, size_t size) {
    register int64_t rax __asm__ ("rax") = 1;
    register int rdi __asm__ ("rdi") = fd;
    register const void *rsi __asm__ ("rsi") = buf;
    register size_t rdx __asm__ ("rdx") = size;
    __asm__ __volatile__ (
        "syscall"
        : "+r" (rax)
        : "r" (rdi), "r" (rsi), "r" (rdx)
        : "cc", "rcx", "r11", "memory"
    );
    return rax;
}
void my_exit(int exit_status) {
    register int64_t rax __asm__ ("rax") = 60;
    register int rdi __asm__ ("rdi") = exit_status;
    __asm__ __volatile__ (
        "syscall"
        : "+r" (rax)
        : "r" (rdi)
        : "cc", "rcx", "r11", "memory"
    );
}
struct Try1
{
    u64 val;
    constexpr Try1() { val=0; }
    u64 Get() const { return val; }
};
static char buf[1024*8]; //originally mmap but lets reduce code
static __thread u64 sanity_check;
static __thread Try1 t1;
static Try1 global;
extern "C"
int _start()
{
    auto tls_size = 4096*2;
    auto originalFS = _readfsbase_u64();
    _writefsbase_u64((u64)(buf+4096));
    global.val = 1;
    global.Get(); //Executes fine
    sanity_check=6;
    t1.val = 7;
    my_write(1, "Hello\n", sanity_check);
    my_write(1, "Hello2\n", t1.val); //Still fine
    my_write(1, "Hello3\n", t1.Get()); //crash! :/
    my_exit(0);
    return 0;
}
Asm:
4010b4:       e8 47 ff ff ff          call   401000 <_Z8my_writeiPKvm>
4010b9:       64 48 8b 04 25 f8 ff    mov    rax,QWORD PTR fs:0xfffffffffffffff8
4010c0:       ff ff 
4010c2:       48 89 c2                mov    rdx,rax
4010c5:       48 8d 05 3b 0f 00 00    lea    rax,[rip+0xf3b]        # 402007 <_ZNK4Try13GetEv+0xeef>
4010cc:       48 89 c6                mov    rsi,rax
4010cf:       bf 01 00 00 00          mov    edi,0x1
4010d4:       e8 27 ff ff ff          call   401000 <_Z8my_writeiPKvm>
4010d9:       64 48 8b 04 25 00 00    mov    rax,QWORD PTR fs:0x0
4010e0:       00 00 
4010e2:       48 05 f8 ff ff ff       add    rax,0xfffffffffffffff8
4010e8:       48 89 c7                mov    rdi,rax
4010eb:       e8 28 00 00 00          call   401118 <_ZNK4Try13GetEv>
4010f0:       48 89 c2                mov    rdx,rax
4010f3:       48 8d 05 15 0f 00 00    lea    rax,[rip+0xf15]        # 40200f <_ZNK4Try13GetEv+0xef7>
4010fa:       48 89 c6                mov    rsi,rax
4010fd:       bf 01 00 00 00          mov    edi,0x1
401102:       e8 f9 fe ff ff          call   401000 <_Z8my_writeiPKvm>
401107:       bf 00 00 00 00          mov    edi,0x0
40110c:       e8 12 ff ff ff          call   401023 <_Z7my_exiti>
401111:       b8 00 00 00 00          mov    eax,0x0
401116:       c9                      leave  
401117:       c3                      ret    
 
     
    