I recently made a program with C++ and ASM. Can anyone help me make this code a more efficient one , in the ASM part or both. I would really appreciate it because i dont know every asm instriction and probably i am using way too many. BTW the program sums two integer vectors with any size. The code that i have is the one above:
C++:
extern "C" {
    int add_vtr_asm(int*, int*, int*, int);
}
void add_vtr() {
    __declspec(align(16))
        int vetor1[1024];
    __declspec(align(16))
        int vetor2[1024];
    __declspec(align(16))
        int soma[1024];
    for (i = 0; i <= 1023; i++) {
        vetor1[i] = i;
        vetor2[i] = i;
    }
    add_vtr_asm(vetor1, vetor2, soma, 1024);
    for (i = 0; i <= 1023; i++) {
        printf("% d + % d = % d \n",vetor1[i] ,vetor2[i], soma[i]);
     
    }
    exit(0);
}
int main()
{
    printf("Programa para somar vetores de inteiros: \n");
    printf("Soma de vetores com % d elementos \n", 1024);
    add_vtr();
}
ASM:
 
.MODEL FLAT, C  
.CODE             
add_vtr_asm PROC 
    push ebp 
    mov ebp,esp
    push esi 
    push edi 
    mov esi,[ebp+8] 
    mov ebx, [ebp+12]
    mov edi, [ebp+16]
    mov ecx,[ebp+20]
    shr ecx,2  
    next:movdqa XMM0,[esi]
    add esi,16
    paddd xmm0,[ebx]
    add ebx,16
    movdqa [edi],xmm0
    add edi,16
    dec ecx
    jnz next
    pop edi
    pop esi
    pop ebp
    ret
    add_vtr_asm ENDP
    END
 
    