I'm trying to figure out what is the best (maybe avx?) optimization for this code
typedef struct {
  float x;
  float y;
} vector;
vector add(vector u, vector v){
  return (vector){u.x+v.x, u.y+v.y};
}
running gcc -S code.c gives a quite long assembly code
    .file   "code.c"
    .text
    .globl  add
    .type   add, @function
add:
.LFB0:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movq    %rdi, -8(%rbp)
    movss   16(%rbp), %xmm1
    movss   48(%rbp), %xmm0
    addss   %xmm0, %xmm1
    movss   32(%rbp), %xmm2
    movss   64(%rbp), %xmm0
    addss   %xmm2, %xmm0
    movq    -8(%rbp), %rax
    movss   %xmm1, (%rax)
    movq    -8(%rbp), %rax
    movss   %xmm0, 16(%rax)
    movq    -8(%rbp), %rax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   add, .-add
    .ident  "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609"
    .section    .note.GNU-stack,"",@progbits
while I expected very few instructions for a so simple task. Could someone help me to optimize this kind of code, while keeping float types?
Thanks.
 
    