I have a multi-threaded application where multiple threads access the same variable - to a pointer. volatile would not be used for the purpose of ensuring atomic access (which doesn't exists), rather to ensure compile doesn't kick in hard optimization because it thinks that one thread cannot affect thread 2 and says no way it can change....
I've seen several volatile is useless in multi-threaded applications post. I have a linked list, that is accessed in 2 threads. Effectively does the following:
Thread 1
- Allocates memory for structure
- Puts the thread to linked list
- Processes the linked list and checks when some value is set to xyz
Thread 2
- Reads the linked list
- Does some processing
- Sets the value to xyz
All operations to linked list are protected by system mutex (FreeRTOS), but this doesn't ensure thread 2 sees the same value as thread 1 from start linked list pointer.
There are in my opinion 2 critical parts in the code:
- What threads see from list_startspointer
- What threads see in the e->datavalue inside the pointer
This is the code, that is done wrongly - it is a reference code, not used in the production.
#include <stdint.h>
#include <stdlib.h>
#include <stdatomic.h>
typedef struct mytype {
    struct mytype* next;
    int data;
} mytype_t;
typedef _Atomic mytype_t mytype_atomic_t;
mytype_t* list_starts;
//mytype_atomic_t* list_starts;
void
thread1() {
    //Start thread2 here...
    //start_thread(thread2);
    //Run the thread here...
    while (1) {
        //mutex_lock();
        /* Create the entry */
        mytype_t* entry = malloc(sizeof(*entry));
        if (entry != NULL) {
            entry->next = list_starts;
            list_starts = entry;
        }
        //mutex_unlock();
        
        //mutex_lock();
start_over:
        for (mytype_t* e = list_starts, *prev = NULL; e != NULL; prev = e, e = e->next) {
            //mutex_unlock();
            //Simply wait to become 3
            while (e->data != 3) {}
            //mutex_lock();
            //Remove from the list
            if (prev == NULL) {
                list_starts = e->next;
            } else {
                prev->next = e->next;
            }
            free(e);
            goto start_over;
        }
        //mutex_unlock();
    }
}
Godbolt with -mcpu=cortex-m4 -O2 produces: https://godbolt.org/z/T84K3x3G4
thread1:
        push    {r4, lr}
        ldr     r4, .L12
.L4:
        movs    r0, #8
        bl      malloc
        cbz     r0, .L2
        ldr     r3, [r4]
        str     r3, [r0]
.L6:
        //Critical part - loaded once
        ldr     r3, [r0, #4]
.L5:
        //Comparison and BNE between self -> no reload
        cmp     r3, #3
        bne     .L5
        ldr     r3, [r0]
        str     r3, [r4]
        bl      free
        ldr     r0, [r4]
        cmp     r0, #0
        bne     .L6
        b       .L4
.L2:
        ldr     r0, [r4]
        cmp     r0, #0
        beq     .L4
        b       .L6
.L12:
        .word   .LANCHOR0
list_starts:
If we change the original code by using mytype_atomic_t* type in the for loop, like so
for (mytype_atomic_t* e = list_starts, *prev = NULL; e != NULL; prev = e, e = e->next) {
    while (e->data != 3) {}
}
Then godbolt produces: https://godbolt.org/z/94an17x1G
thread1:
        push    {r3, r4, r5, lr}
        ldr     r4, .L13
        ldr     r5, [r4]
.L4:
        movs    r0, #8
        bl      malloc
        cbz     r0, .L2
        str     r5, [r0]
        str     r0, [r4]
.L6:
        adds    r2, r0, #4
.L5:
        //Barrier
        dmb     ish
        //Load first...
        ldr     r3, [r2]
        dmb     ish
        cmp     r3, #3
        //Jump back to pre-load
        bne     .L5
        dmb     ish
        ldr     r3, [r0]
        dmb     ish
        str     r3, [r4]
        bl      free
        ldr     r0, [r4]
        cmp     r0, #0
        bne     .L6
.L7:
        movs    r5, #0
        b       .L4
.L2:
        cmp     r5, #0
        beq     .L7
        mov     r0, r5
        b       .L6
.L13:
        .word   .LANCHOR0
list_starts:
The ultimate question - is it enough to simply:
- declare custom type with _Atomickeyword?
- Do not use volatile at all in such case?
 
    