EDIT: I edited both the question and its title to be more precise.
Considering the following source code:
#include <vector>
struct xyz {
    xyz() { } // empty constructor, but the compiler doesn't care
    xyz(const xyz& o): v(o.v) { } 
    xyz& operator=(const xyz& o) { v=o.v; return *this; }
    int v; // <will be initialized to int(), which means 0
};
std::vector<xyz> test() {
    return std::vector<xyz>(1024); // will do a memset() :-(
}
...how can I avoid the memory allocated by the vector<> to be initialized with copies of its first element, which is a O(n) operation I'd rather skip for the sake of speed, since my default constructor does nothing ?
A g++ specific solution will do, if no generic one exists (but I couldn't find any attribute to do that).
EDIT: generated code follows (command line: arm-elf-g++-4.5 -O3 -S -fno-verbose-asm -o - test.cpp | arm-elf-c++filt | grep -vE '^[[:space:]]+[.@].*$' )
test():
    mov r3, #0
    stmfd   sp!, {r4, lr}
    mov r4, r0
    str r3, [r0, #0]
    str r3, [r0, #4]
    str r3, [r0, #8]
    mov r0, #4096
    bl  operator new(unsigned long)
    add r1, r0, #4096
    add r2, r0, #4080
    str r0, [r4, #0]
    stmib   r4, {r0, r1}
    add r2, r2, #12
    b       .L4          @
.L8:                     @
    add     r0, r0, #4   @
.L4:                     @
    cmp     r0, #0       @  fill the memory
    movne   r3, #0       @
    strne   r3, [r0, #0] @
    cmp     r0, r2       @
    bne     .L8          @
    str r1, [r4, #4]
    mov r0, r4
    ldmfd   sp!, {r4, pc}
EDIT: For the sake of completeness, here is the assembly for x86_64:
.globl test()
test():
LFB450:
    pushq   %rbp
LCFI0:
    movq    %rsp, %rbp
LCFI1:
    pushq   %rbx
LCFI2:
    movq    %rdi, %rbx
    subq    $8, %rsp
LCFI3:
    movq    $0, (%rdi)
    movq    $0, 8(%rdi)
    movq    $0, 16(%rdi)
    movl    $4096, %edi
    call    operator new(unsigned long)
    leaq    4096(%rax), %rcx
    movq    %rax, (%rbx)
    movq    %rax, 8(%rbx)
    leaq    4092(%rax), %rdx
    movq    %rcx, 16(%rbx)
    jmp     L4          @
L8:                     @
    addq    $4, %rax    @
L4:                     @
    testq   %rax, %rax  @ memory-filling loop
    je      L2          @
    movl    $0, (%rax)  @
L2:                     @
    cmpq    %rdx, %rax  @
    jne     L8          @
    movq    %rcx, 8(%rbx)
    movq    %rbx, %rax
    addq    $8, %rsp
    popq    %rbx
    leave
LCFI4:
    ret
LFE450:
EH_frame1:
LSCIE1:
LECIE1:
LSFDE1:
LASFDE1:
LEFDE1:
EDIT: I think the conclusion is to not use std::vector<> when you want to avoid unneeded initialization. I ended up unrolling my own templated container, which performs better (and has specialized versions for neon and armv7).
 
     
     
     
     
     
     
     
     
    