Solution
apparently, according to what is the difference between free(a) and memset(a, 0, malloced_size) (actual title is "Free function in c" but i feel that is not specific enough), i need to memset the data to 0 before/after i free it to actually free it as if it was never assigned any data
UPDATE: fixed
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <stdio.h>
uintptr_t round_up(uintptr_t value, uintptr_t size)
{
    fprintf(stderr, "called round_up\nreturning %p\n", (void *) (value ? size * ((value + (size - 1)) / size) : size));
    return value ? size * ((value + (size - 1)) / size) : size;
}
int read_fast_verify(const char *src, int len_of_source, char **dest, char ** a, int requested_len) {
    *a = malloc(requested_len+4096);
    if (len_of_source < requested_len) memcpy(*a, src, len_of_source);
    else memcpy(*a, src, requested_len);
    *dest = memmove((void *)round_up((uintptr_t)*a, 4096), src, requested_len);
    return requested_len;
}
void __lseek_string__(char **src, int len, int offset) {
    memmove(*src, *src+offset, len);
}
char * string1 = "hello";
char * string2;
char * s;
int main(void) {
    read_fast_verify(string1, strlen(string1), &string2, &s, (strlen(string1) + 5));
    __lseek_string__(&string2, strlen(string1), 5);
    free(s);
}
Problem
How do I free a malloced pointer after I memmove it, as memmove seems to seg fault if I use a NULL dest as its return (eg char * dest = memmove(...);)
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <stdio.h>
uintptr_t round_up(uintptr_t value, uintptr_t size)
{
    fprintf(stderr, "called round_up\nreturning %p\n", (void *) (value ? size * ((value + (size - 1)) / size) : size));
    return value ? size * ((value + (size - 1)) / size) : size;
}
int read_fast_verify(const char *src, int len_of_source, char **dest, char ** a, int requested_len) {
    *a = malloc(requested_len+4096);
    if (len_of_source < requested_len) memcpy(*a, src, len_of_source);
    else memcpy(*a, src, requested_len);
    *dest = memmove((void *)round_up((uintptr_t)*a, 4096), *dest, requested_len);
    return requested_len;
}
void __lseek_string__(char **src, int len, int offset) {
    memmove(*src, *src+offset, len);
}
char * string1 = "hello";
char * string2;
char * s;
int main(void) {
    read_fast_verify(string1, strlen(string1), &string2, &s, (strlen(string1) + 5));
    __lseek_string__(&string2, strlen(string1), 5);
free(s);
}
the output
    Starting program: /home/arch/universal-dynamic-loader/loader/test_case 
    called round_up
    returning 0x55555555a000
    Program received signal SIGSEGV, Segmentation fault.
    0x00007ffff7f2d3b0 in __memmove_ssse3_back () from /usr/lib/libc.so.6
However if I just do it as normal
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <stdio.h>
uintptr_t round_up(uintptr_t value, uintptr_t size)
{
    fprintf(stderr, "called round_up\nreturning %p\n", (void *) (value ? size * ((value + (size - 1)) / size) : size));
    return value ? size * ((value + (size - 1)) / size) : size;
}
int read_fast_verify(const char *src, int len_of_source, char **dest, char ** a, int requested_len) {
    *dest = malloc(requested_len+4096);
    if (len_of_source < requested_len) memcpy(*dest, src, len_of_source);
    else memcpy(*dest, src, requested_len);
    *dest = memmove((void *)round_up((uintptr_t)*dest, 4096), *dest, requested_len);
    return requested_len;
}
void __lseek_string__(char **src, int len, int offset) {
    memmove(*src, *src+offset, len);
}
char * string1 = "hello";
char * string2;
char * s;
int main(void) {
    read_fast_verify(string1, strlen(string1), &string2, &s, (strlen(string1) + 5));
    __lseek_string__(&string2, strlen(string1), 5);
    free(string2);
}
then I get this
called round_up
returning 0x564c86f54000
free(): invalid pointer
Aborted (core dumped)
and from valgrind
==19175== HEAP SUMMARY:
==19175==     in use at exit: 4,106 bytes in 1 blocks
==19175==   total heap usage: 1 allocs, 1 frees, 4,106 bytes allocated
==19175== 
==19175== Searching for pointers to 1 not-freed blocks
==19175== Checked 68,008 bytes
==19175== 
==19175== 4,106 bytes in 1 blocks are possibly lost in loss record 1 of 1
==19175==    at 0x4837757: malloc (vg_replace_malloc.c:299)
==19175==    by 0x10923C: read_fast_verify (test_case.c:13)
==19175==    by 0x109357: main (test_case.c:28)
==19175== 
==19175== LEAK SUMMARY:
==19175==    definitely lost: 0 bytes in 0 blocks
==19175==    indirectly lost: 0 bytes in 0 blocks
==19175==      possibly lost: 4,106 bytes in 1 blocks
==19175==    still reachable: 0 bytes in 0 blocks
==19175==         suppressed: 0 bytes in 0 blocks
==19175== 
==19175== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
==19175== 
==19175== 1 errors in context 1 of 2:
==19175== Invalid free() / delete / delete[] / realloc()
==19175==    at 0x4838904: free (vg_replace_malloc.c:530)
==19175==    by 0x109388: main (test_case.c:30)
==19175==  Address 0x4a30000 is 4,032 bytes inside a block of size 4,106 alloc'd
==19175==    at 0x4837757: malloc (vg_replace_malloc.c:299)
==19175==    by 0x10923C: read_fast_verify (test_case.c:13)
==19175==    by 0x109357: main (test_case.c:28)
==19175== 
==19175== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
as I am trying to make it memory safe as my dynamic linker is currently using over 2.7 gb (and over 4 GB shared memory) when reading a .so (such as libc 2.28) which it should not be at all.
 
    