Consider the following two snippets:
#define ALIGN_BYTES 32
#define ASSUME_ALIGNED(x) x = __builtin_assume_aligned(x, ALIGN_BYTES)
void fn0(const float *restrict a0, const float *restrict a1,
         float *restrict b, int n)
{
    ASSUME_ALIGNED(a0); ASSUME_ALIGNED(a1); ASSUME_ALIGNED(b);
    for (int i = 0; i < n; ++i)
        b[i] = a0[i] + a1[i];
}
void fn1(const float *restrict *restrict a, float *restrict b, int n)
{
    ASSUME_ALIGNED(a[0]); ASSUME_ALIGNED(a[1]); ASSUME_ALIGNED(b);
    for (int i = 0; i < n; ++i)
        b[i] = a[0][i] + a[1][i];
}
When I compile the function as gcc-4.7.2 -Ofast -march=native  -std=c99 -ftree-vectorizer-verbose=5 -S test.c -Wall I find that GCC inserts aliasing checks for the second function.
How can I prevent this such that the resulting assembly for fn1 is the same as that for fn0?  (When the number of parameters increases from three to, say, 30 the argument-passing approach (fn0) becomes cumbersome and the number of aliasing checks in the fn1 approach becomes ridiculous .)
Assembly (x86-64, AVX capable chip); aliasing cruft at .LFB10
fn0:
.LFB9:
    .cfi_startproc
    testl   %ecx, %ecx
    jle .L1
    movl    %ecx, %r10d
    shrl    $3, %r10d
    leal    0(,%r10,8), %r9d
    testl   %r9d, %r9d
    je  .L8
    cmpl    $7, %ecx
    jbe .L8
    xorl    %eax, %eax
    xorl    %r8d, %r8d
    .p2align 4,,10
    .p2align 3
.L4:
    vmovaps (%rsi,%rax), %ymm0
    addl    $1, %r8d
    vaddps  (%rdi,%rax), %ymm0, %ymm0
    vmovaps %ymm0, (%rdx,%rax)
    addq    $32, %rax
    cmpl    %r8d, %r10d
    ja  .L4
    cmpl    %r9d, %ecx
    je  .L1
.L3:
    movslq  %r9d, %rax
    salq    $2, %rax
    addq    %rax, %rdi
    addq    %rax, %rsi
    addq    %rax, %rdx
    xorl    %eax, %eax
    .p2align 4,,10
    .p2align 3
.L6:
    vmovss  (%rsi,%rax,4), %xmm0
    vaddss  (%rdi,%rax,4), %xmm0, %xmm0
    vmovss  %xmm0, (%rdx,%rax,4)
    addq    $1, %rax
    leal    (%r9,%rax), %r8d
    cmpl    %r8d, %ecx
    jg  .L6
.L1:
    vzeroupper
    ret
.L8:
    xorl    %r9d, %r9d
    jmp .L3
    .cfi_endproc
.LFE9:
    .size   fn0, .-fn0
    .p2align 4,,15
    .globl  fn1
    .type   fn1, @function
fn1:
.LFB10:
    .cfi_startproc
    testq   %rdx, %rdx
    movq    (%rdi), %r8
    movq    8(%rdi), %r9
    je  .L12
    leaq    32(%rsi), %rdi
    movq    %rdx, %r10
    leaq    32(%r8), %r11
    shrq    $3, %r10
    cmpq    %rdi, %r8
    leaq    0(,%r10,8), %rax
    setae   %cl
    cmpq    %r11, %rsi
    setae   %r11b
    orl %r11d, %ecx
    cmpq    %rdi, %r9
    leaq    32(%r9), %r11
    setae   %dil
    cmpq    %r11, %rsi
    setae   %r11b
    orl %r11d, %edi
    andl    %edi, %ecx
    cmpq    $7, %rdx
    seta    %dil
    testb   %dil, %cl
    je  .L19
    testq   %rax, %rax
    je  .L19
    xorl    %ecx, %ecx
    xorl    %edi, %edi
    .p2align 4,,10
    .p2align 3
.L15:
    vmovaps (%r9,%rcx), %ymm0
    addq    $1, %rdi
    vaddps  (%r8,%rcx), %ymm0, %ymm0
    vmovaps %ymm0, (%rsi,%rcx)
    addq    $32, %rcx
    cmpq    %rdi, %r10
    ja  .L15
    cmpq    %rax, %rdx
    je  .L12
    .p2align 4,,10
    .p2align 3
.L20:
    vmovss  (%r9,%rax,4), %xmm0
    vaddss  (%r8,%rax,4), %xmm0, %xmm0
    vmovss  %xmm0, (%rsi,%rax,4)
    addq    $1, %rax
    cmpq    %rax, %rdx
    ja  .L20
.L12:
    vzeroupper
    ret
.L19:
    xorl    %eax, %eax
    jmp .L20
    .cfi_endproc